On 2018/11/2 上午4:15, Stefan Hajnoczi wrote:
If the network stack calls .send_pkt()/.cancel_pkt() during .release(),
a struct vhost_vsock use-after-free is possible. This occurs because
.release() does not wait for other CPUs to stop using struct
vhost_vsock.
Introduce a refcount for network stack callers in struct vhost_vsock and
wake up .release() when the refcount reaches zero.
Reported-and-tested-by: syzbot+bd391451452fb0b93039@xxxxxxxxxxxxxxxxxxxxxxxxx
Reported-by: syzbot+e3e074963495f92a89ed@xxxxxxxxxxxxxxxxxxxxxxxxx
Reported-by: syzbot+d5a0a170c5069658b141@xxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
---
Here is a version that avoids unnecessary wake_up() calls and passes
syzbot. I'm happy with this fix now.
drivers/vhost/vsock.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 34bc3ab40c6d..8daffbc4013a 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -9,6 +9,7 @@
*/
#include <linux/miscdevice.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/vmalloc.h>
@@ -42,6 +43,12 @@ struct vhost_vsock {
atomic_t queued_replies;
+ /* For staying alive while there are network stack
+ * .send_pkt()/.cancel_pkt() callers.
+ */
+ refcount_t net_users;
+ wait_queue_head_t net_users_wq;
+
u32 guest_cid;
};
@@ -75,6 +82,8 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
spin_lock_bh(&vhost_vsock_lock);
vsock = __vhost_vsock_get(guest_cid);
+ if (vsock)
+ refcount_inc(&vsock->net_users);
spin_unlock_bh(&vhost_vsock_lock);
One more atomic operation compared to my patch.
And the global spinlock on datapath is not avoided on datapath.
I think it's better to use hlist + RCU instead of refcount. We can avoid
spinlocks, refcounts on datapath.
Thanks
return vsock;
@@ -225,6 +234,10 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
spin_unlock_bh(&vsock->send_pkt_list_lock);
vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+
+ if (refcount_dec_and_test(&vsock->net_users))
+ wake_up(&vsock->net_users_wq);
+
return len;
}
@@ -265,6 +278,9 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
vhost_poll_queue(&tx_vq->poll);
}
+ if (refcount_dec_and_test(&vsock->net_users))
+ wake_up(&vsock->net_users_wq);
+
return 0;
}
@@ -521,6 +537,8 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
vsock->guest_cid = 0; /* no CID assigned yet */
atomic_set(&vsock->queued_replies, 0);
+ refcount_set(&vsock->net_users, 1);
+ init_waitqueue_head(&vsock->net_users_wq);
vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
@@ -557,13 +575,17 @@ static void vhost_vsock_flush(struct vhost_vsock *vsock)
static void vhost_vsock_reset_orphans(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ bool orphan;
+
+ spin_lock_bh(&vhost_vsock_lock);
+ orphan = __vhost_vsock_get(vsk->remote_addr.svm_cid) == NULL;
+ spin_unlock_bh(&vhost_vsock_lock);
/* vmci_transport.c doesn't take sk_lock here either. At least we're
* under vsock_table_lock so the sock cannot disappear while we're
* executing.
*/
-
- if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) {
+ if (orphan) {
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
sk->sk_state = SS_UNCONNECTED;
@@ -580,6 +602,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
list_del(&vsock->list);
spin_unlock_bh(&vhost_vsock_lock);
+ /* Now that the vsock instance is no longer visible, wait for other
+ * CPUs to drop their references.
+ */
+ if (!refcount_dec_and_test(&vsock->net_users))
+ wait_event(vsock->net_users_wq,
+ refcount_read(&vsock->net_users) == 0);
+
/* Iterating over all connections for all CIDs to find orphans is
* inefficient. Room for improvement here. */
vsock_for_each_connected_socket(vhost_vsock_reset_orphans);