summaryrefslogtreecommitdiff
path: root/net/vmw_vsock/hyperv_transport.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/vmw_vsock/hyperv_transport.c')
-rw-r--r--net/vmw_vsock/hyperv_transport.c170
1 files changed, 101 insertions, 69 deletions
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 2c63f7b169b5..6614512f8180 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -35,6 +35,9 @@
/* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16)
+/* How long to wait for graceful shutdown of a connection */
+#define HVS_CLOSE_TIMEOUT (8 * HZ)
+
struct vmpipe_proto_header {
u32 pkt_type;
u32 data_size;
@@ -217,18 +220,6 @@ static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
set_channel_pending_send_size(chan,
HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
- /* See hvs_stream_has_space(): we must make sure the host has seen
- * the new pending send size, before we can re-check the writable
- * bytes.
- */
- virt_mb();
-}
-
-static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
-{
- set_channel_pending_send_size(chan, 0);
-
- /* Ditto */
virt_mb();
}
@@ -298,27 +289,42 @@ static void hvs_channel_cb(void *ctx)
if (hvs_channel_readable(chan))
sk->sk_data_ready(sk);
- /* See hvs_stream_has_space(): when we reach here, the writable bytes
- * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
- */
if (hv_get_bytes_to_write(&chan->outbound) > 0)
sk->sk_write_space(sk);
}
-static void hvs_close_connection(struct vmbus_channel *chan)
+static void hvs_do_close_lock_held(struct vsock_sock *vsk,
+ bool cancel_timeout)
{
- struct sock *sk = get_per_channel_state(chan);
- struct vsock_sock *vsk = vsock_sk(sk);
-
- lock_sock(sk);
+ struct sock *sk = sk_vsock(vsk);
- sk->sk_state = SS_UNCONNECTED;
sock_set_flag(sk, SOCK_DONE);
- vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
-
+ vsk->peer_shutdown = SHUTDOWN_MASK;
+ if (vsock_stream_has_data(vsk) <= 0)
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
+ if (vsk->close_work_scheduled &&
+ (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
+ vsk->close_work_scheduled = false;
+ vsock_remove_sock(vsk);
+ /* Release the reference taken while scheduling the timeout */
+ sock_put(sk);
+ }
+}
+
+static void hvs_close_connection(struct vmbus_channel *chan)
+{
+ struct sock *sk = get_per_channel_state(chan);
+
+ lock_sock(sk);
+ hvs_do_close_lock_held(vsock_sk(sk), true);
release_sock(sk);
+
+ /* Release the refcnt for the channel that's opened in
+ * hvs_open_connection().
+ */
+ sock_put(sk);
}
static void hvs_open_connection(struct vmbus_channel *chan)
@@ -328,8 +334,9 @@ static void hvs_open_connection(struct vmbus_channel *chan)
struct sockaddr_vm addr;
struct sock *sk, *new = NULL;
- struct vsock_sock *vnew;
- struct hvsock *hvs, *hvs_new;
+ struct vsock_sock *vnew = NULL;
+ struct hvsock *hvs = NULL;
+ struct hvsock *hvs_new = NULL;
int ret;
if_type = &chan->offermsg.offer.if_type;
@@ -350,8 +357,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
lock_sock(sk);
- if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) ||
- (!conn_from_host && sk->sk_state != SS_CONNECTING))
+ if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
+ (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
goto out;
if (conn_from_host) {
@@ -363,7 +370,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if (!new)
goto out;
- new->sk_state = SS_CONNECTING;
+ new->sk_state = TCP_SYN_SENT;
vnew = vsock_sk(new);
hvs_new = vnew->trans;
hvs_new->chan = chan;
@@ -387,10 +394,20 @@ static void hvs_open_connection(struct vmbus_channel *chan)
}
set_per_channel_state(chan, conn_from_host ? new : sk);
+
+ /* This reference will be dropped by hvs_close_connection(). */
+ sock_hold(conn_from_host ? new : sk);
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
+ /* Set the pending send size to max packet size to always get
+ * notifications from the host when there is enough writable space.
+ * The host is optimized to send notifications only when the pending
+ * size boundary is crossed, and not always.
+ */
+ hvs_set_channel_pending_send_size(chan);
+
if (conn_from_host) {
- new->sk_state = SS_CONNECTED;
+ new->sk_state = TCP_ESTABLISHED;
sk->sk_ack_backlog++;
hvs_addr_init(&vnew->local_addr, if_type);
@@ -403,7 +420,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
vsock_enqueue_accept(sk, new);
} else {
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsock_sk(sk));
@@ -453,50 +470,80 @@ static int hvs_connect(struct vsock_sock *vsk)
return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
}
+static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
+{
+ struct vmpipe_proto_header hdr;
+
+ if (hvs->fin_sent || !hvs->chan)
+ return;
+
+ /* It can't fail: see hvs_channel_writable_bytes(). */
+ (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
+ hvs->fin_sent = true;
+}
+
static int hvs_shutdown(struct vsock_sock *vsk, int mode)
{
struct sock *sk = sk_vsock(vsk);
- struct vmpipe_proto_header hdr;
- struct hvs_send_buf *send_buf;
- struct hvsock *hvs;
if (!(mode & SEND_SHUTDOWN))
return 0;
lock_sock(sk);
+ hvs_shutdown_lock_held(vsk->trans, mode);
+ release_sock(sk);
+ return 0;
+}
- hvs = vsk->trans;
- if (hvs->fin_sent)
- goto out;
-
- send_buf = (struct hvs_send_buf *)&hdr;
+static void hvs_close_timeout(struct work_struct *work)
+{
+ struct vsock_sock *vsk =
+ container_of(work, struct vsock_sock, close_work.work);
+ struct sock *sk = sk_vsock(vsk);
- /* It can't fail: see hvs_channel_writable_bytes(). */
- (void)hvs_send_data(hvs->chan, send_buf, 0);
+ sock_hold(sk);
+ lock_sock(sk);
+ if (!sock_flag(sk, SOCK_DONE))
+ hvs_do_close_lock_held(vsk, false);
- hvs->fin_sent = true;
-out:
+ vsk->close_work_scheduled = false;
release_sock(sk);
- return 0;
+ sock_put(sk);
}
-static void hvs_release(struct vsock_sock *vsk)
+/* Returns true, if it is safe to remove socket; false otherwise */
+static bool hvs_close_lock_held(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
- struct hvsock *hvs = vsk->trans;
- struct vmbus_channel *chan;
- lock_sock(sk);
+ if (!(sk->sk_state == TCP_ESTABLISHED ||
+ sk->sk_state == TCP_CLOSING))
+ return true;
- sk->sk_state = TCP_CLOSING;
- vsock_remove_sock(vsk);
+ if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
+ hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
- release_sock(sk);
+ if (sock_flag(sk, SOCK_DONE))
+ return true;
- chan = hvs->chan;
- if (chan)
- hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+ /* This reference will be dropped by the delayed close routine */
+ sock_hold(sk);
+ INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
+ vsk->close_work_scheduled = true;
+ schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
+ return false;
+}
+static void hvs_release(struct vsock_sock *vsk)
+{
+ struct sock *sk = sk_vsock(vsk);
+ bool remove_sock;
+
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+ remove_sock = hvs_close_lock_held(vsk);
+ release_sock(sk);
+ if (remove_sock)
+ vsock_remove_sock(vsk);
}
static void hvs_destruct(struct vsock_sock *vsk)
@@ -652,23 +699,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk)
static s64 hvs_stream_has_space(struct vsock_sock *vsk)
{
struct hvsock *hvs = vsk->trans;
- struct vmbus_channel *chan = hvs->chan;
- s64 ret;
- ret = hvs_channel_writable_bytes(chan);
- if (ret > 0) {
- hvs_clear_channel_pending_send_size(chan);
- } else {
- /* See hvs_channel_cb() */
- hvs_set_channel_pending_send_size(chan);
-
- /* Re-check the writable bytes to avoid race */
- ret = hvs_channel_writable_bytes(chan);
- if (ret > 0)
- hvs_clear_channel_pending_send_size(chan);
- }
-
- return ret;
+ return hvs_channel_writable_bytes(hvs->chan);
}
static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)