From 2d5a1b31799efcd37a57fe4d2f492d8dc2a0a334 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 14 Dec 2016 19:24:36 +0800 Subject: vsock: lookup and setup guest_cid inside vhost_vsock_lock [ Upstream commit 6c083c2b8a0a110cad936bc0a2c089f0d8115175 ] Multi vsocks may setup the same cid at the same time. Signed-off-by: Gao feng Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers/vhost/vsock.c') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 0ec970ca64ce..6bca57896915 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -50,11 +50,10 @@ static u32 vhost_transport_get_local_cid(void) return VHOST_VSOCK_DEFAULT_HOST_CID; } -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) { struct vhost_vsock *vsock; - spin_lock_bh(&vhost_vsock_lock); list_for_each_entry(vsock, &vhost_vsock_list, list) { u32 other_cid = vsock->guest_cid; @@ -63,15 +62,24 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) continue; if (other_cid == guest_cid) { - spin_unlock_bh(&vhost_vsock_lock); return vsock; } } - spin_unlock_bh(&vhost_vsock_lock); return NULL; } +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +{ + struct vhost_vsock *vsock; + + spin_lock_bh(&vhost_vsock_lock); + vsock = __vhost_vsock_get(guest_cid); + spin_unlock_bh(&vhost_vsock_lock); + + return vsock; +} + static void vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) @@ -607,11 +615,12 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) return -EINVAL; /* Refuse if CID is already in use */ - other = vhost_vsock_get(guest_cid); - if (other && other != vsock) - return -EADDRINUSE; - spin_lock_bh(&vhost_vsock_lock); + other = __vhost_vsock_get(guest_cid); + if (other && other != vsock) { + spin_unlock_bh(&vhost_vsock_lock); + return -EADDRINUSE; + } vsock->guest_cid = guest_cid; spin_unlock_bh(&vhost_vsock_lock); -- cgit v1.2.3 From 569fc4ffb5de8f12fe01759f0b85098b7b9bba8e Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 5 Nov 2018 10:35:47 +0000 Subject: vhost/vsock: fix use-after-free in network stack callers [ Upstream commit 834e772c8db0c6a275d75315d90aba4ebbb1e249 ] If the network stack calls .send_pkt()/.cancel_pkt() during .release(), a struct vhost_vsock use-after-free is possible. This occurs because .release() does not wait for other CPUs to stop using struct vhost_vsock. Switch to an RCU-enabled hashtable (indexed by guest CID) so that .release() can wait for other CPUs by calling synchronize_rcu(). This also eliminates vhost_vsock_lock acquisition in the data path so it could have a positive effect on performance. This is CVE-2018-14625 "kernel: use-after-free Read in vhost_transport_send_pkt". Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+bd391451452fb0b93039@syzkaller.appspotmail.com Reported-by: syzbot+e3e074963495f92a89ed@syzkaller.appspotmail.com Reported-by: syzbot+d5a0a170c5069658b141@syzkaller.appspotmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 57 +++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'drivers/vhost/vsock.c') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6bca57896915..f800f89068db 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "vhost.h" @@ -27,14 +28,14 @@ enum { /* Used to track all the vhost_vsock instances on the system. */ static DEFINE_SPINLOCK(vhost_vsock_lock); -static LIST_HEAD(vhost_vsock_list); +static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); struct vhost_vsock { struct vhost_dev dev; struct vhost_virtqueue vqs[2]; - /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ - struct list_head list; + /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */ + struct hlist_node hash; struct vhost_work send_pkt_work; spinlock_t send_pkt_list_lock; @@ -50,11 +51,14 @@ static u32 vhost_transport_get_local_cid(void) return VHOST_VSOCK_DEFAULT_HOST_CID; } -static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) +/* Callers that dereference the return value must hold vhost_vsock_lock or the + * RCU read lock. + */ +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) { struct vhost_vsock *vsock; - list_for_each_entry(vsock, &vhost_vsock_list, list) { + hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) { u32 other_cid = vsock->guest_cid; /* Skip instances that have no CID yet */ @@ -69,17 +73,6 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) return NULL; } -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -{ - struct vhost_vsock *vsock; - - spin_lock_bh(&vhost_vsock_lock); - vsock = __vhost_vsock_get(guest_cid); - spin_unlock_bh(&vhost_vsock_lock); - - return vsock; -} - static void vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) @@ -206,9 +199,12 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) struct vhost_virtqueue *vq; int len = pkt->len; + rcu_read_lock(); + /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); if (!vsock) { + rcu_read_unlock(); virtio_transport_free_pkt(pkt); return -ENODEV; } @@ -223,6 +219,8 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) spin_unlock_bh(&vsock->send_pkt_list_lock); vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + + rcu_read_unlock(); return len; } @@ -232,12 +230,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) struct vhost_vsock *vsock; struct virtio_vsock_pkt *pkt, *n; int cnt = 0; + int ret = -ENODEV; LIST_HEAD(freeme); + rcu_read_lock(); + /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); if (!vsock) - return -ENODEV; + goto out; spin_lock_bh(&vsock->send_pkt_list_lock); list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { @@ -263,7 +264,10 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) vhost_poll_queue(&tx_vq->poll); } - return 0; + ret = 0; +out: + rcu_read_unlock(); + return ret; } static struct virtio_vsock_pkt * @@ -529,10 +533,6 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) spin_lock_init(&vsock->send_pkt_list_lock); INIT_LIST_HEAD(&vsock->send_pkt_list); vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); - - spin_lock_bh(&vhost_vsock_lock); - list_add_tail(&vsock->list, &vhost_vsock_list); - spin_unlock_bh(&vhost_vsock_lock); return 0; out: @@ -573,9 +573,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) struct vhost_vsock *vsock = file->private_data; spin_lock_bh(&vhost_vsock_lock); - list_del(&vsock->list); + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); spin_unlock_bh(&vhost_vsock_lock); + /* Wait for other CPUs to finish using vsock */ + synchronize_rcu(); + /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); @@ -616,12 +620,17 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) /* Refuse if CID is already in use */ spin_lock_bh(&vhost_vsock_lock); - other = __vhost_vsock_get(guest_cid); + other = vhost_vsock_get(guest_cid); if (other && other != vsock) { spin_unlock_bh(&vhost_vsock_lock); return -EADDRINUSE; } + + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); + vsock->guest_cid = guest_cid; + hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); spin_unlock_bh(&vhost_vsock_lock); return 0; -- cgit v1.2.3 From 06ec6679fe12cacafce68ab7b509586482a2ae1b Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 6 Dec 2018 19:14:34 +0000 Subject: vhost/vsock: fix reset orphans race with close timeout [ Upstream commit c38f57da428b033f2721b611d84b1f40bde674a8 ] If a local process has closed a connected socket and hasn't received a RST packet yet, then the socket remains in the table until a timeout expires. When a vhost_vsock instance is released with the timeout still pending, the socket is never freed because vhost_vsock has already set the SOCK_DONE flag. Check if the close timer is pending and let it close the socket. This prevents the race which can leak sockets. Reported-by: Maximilian Riemensberger Cc: Graham Whaley Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/vhost/vsock.c') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index f800f89068db..46f966d7c328 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -559,13 +559,21 @@ static void vhost_vsock_reset_orphans(struct sock *sk) * executing. */ - if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) { - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - sk->sk_state = SS_UNCONNECTED; - sk->sk_err = ECONNRESET; - sk->sk_error_report(sk); - } + /* If the peer is still valid, no need to reset connection */ + if (vhost_vsock_get(vsk->remote_addr.svm_cid)) + return; + + /* If the close timeout is pending, let it expire. This avoids races + * with the timeout callback. + */ + if (vsk->close_work_scheduled) + return; + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); } static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -- cgit v1.2.3 From 258d8549b55e2cd5d73d9ff3d18b39aeeab08e1f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Nov 2017 13:29:10 +0000 Subject: vhost/vsock: fix uninitialized vhost_vsock->guest_cid commit a72b69dc083a931422cc8a5e33841aff7d5312f2 upstream. The vhost_vsock->guest_cid field is uninitialized when /dev/vhost-vsock is opened until the VHOST_VSOCK_SET_GUEST_CID ioctl is called. kvmalloc(..., GFP_KERNEL | __GFP_RETRY_MAYFAIL) does not zero memory. All other vhost_vsock fields are initialized explicitly so just initialize this field too. Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Cc: Daniel Verkamp Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/vhost/vsock.c') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 46f966d7c328..72e914de473e 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -520,6 +520,8 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) goto out; } + vsock->guest_cid = 0; /* no CID assigned yet */ + atomic_set(&vsock->queued_replies, 0); vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; -- cgit v1.2.3 From 5ebcee949fa04456053207541a0cbe51b42e552f Mon Sep 17 00:00:00 2001 From: Zha Bin Date: Tue, 8 Jan 2019 16:07:03 +0800 Subject: vhost/vsock: fix vhost vsock cid hashing inconsistent commit 7fbe078c37aba3088359c9256c1a1d0c3e39ee81 upstream. The vsock core only supports 32bit CID, but the Virtio-vsock spec define CID (dst_cid and src_cid) as u64 and the upper 32bits is reserved as zero. This inconsistency causes one bug in vhost vsock driver. The scenarios is: 0. A hash table (vhost_vsock_hash) is used to map an CID to a vsock object. And hash_min() is used to compute the hash key. hash_min() is defined as: (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)). That means the hash algorithm has dependency on the size of macro argument 'val'. 0. In function vhost_vsock_set_cid(), a 64bit CID is passed to hash_min() to compute the hash key when inserting a vsock object into the hash table. 0. In function vhost_vsock_get(), a 32bit CID is passed to hash_min() to compute the hash key when looking up a vsock for an CID. Because the different size of the CID, hash_min() returns different hash key, thus fails to look up the vsock object for an CID. To fix this bug, we keep CID as u64 in the IOCTLs and virtio message headers, but explicitly convert u64 to u32 when deal with the hash table and vsock core. Fixes: 834e772c8db0 ("vhost/vsock: fix use-after-free in network stack callers") Link: https://github.com/stefanha/virtio/blob/vsock/trunk/content.tex Signed-off-by: Zha Bin Reviewed-by: Liu Jiang Reviewed-by: Stefan Hajnoczi Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Shengjing Zhu Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vhost/vsock.c') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 72e914de473e..3cefd602b5b1 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -640,7 +640,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) hash_del_rcu(&vsock->hash); vsock->guest_cid = guest_cid; - hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); + hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid); spin_unlock_bh(&vhost_vsock_lock); return 0; -- cgit v1.2.3