[PATCH] nbd: fix hang in NBD_DO_IT ioctl error handling

Mike Christie <mchristi@xxxxxxxxxx> · Mon, 14 Oct 2019 01:36:10 -0500

This fixes a regression added with:

commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4
Author: Mike Christie <mchristi@xxxxxxxxxx>
Date:   Sun Aug 4 14:10:06 2019 -0500

    nbd: fix max number of supported devs

Before the patch, if we got a signal in the NBD_DO_IT ioctl, we would
call sock_shutdown then return immediately. The patch added a
flush_workqueue call in that error handler path, so we now wait for
the recv_work to complete. The problem is that some sockets do not
implemnent a shutdown callout, so when sock_shutdown is called the
flush_workqueue call is stuck waiting for the workqueue to break out
of the sock_recvmsg call.

This patch just moves where we create/destroy the recv workqueue to the
nbd_device and removes the flush_workqueue call, so we have the behavior
we had before where when using the ioctl interface and we get a signal the
recv works will be left running until module removal time when the devices
are removed.

For the next kernel, I think we can do something more invasive like switch
from workqueues to threads and use signals to break out of the recvmsg call
(we had recently removed the last signal use for socket shutdown so I was
not sure if we wanted to do this), or figure out a list of sockets families
nbd supports and implement shutdown functions for them (I did not get a reply
for that here https://lkml.org/lkml/2019/10/1/1323 so I'm assuming no one
really knows and and I am still digging into that).

Reported-by: syzbot+24c12fa8d218ed26011a@xxxxxxxxxxxxxxxxxxxxxxxxx
Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") 
Signed-off-by: Mike Christie <mchristi@xxxxxxxxxx>

---
 drivers/block/nbd.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index ac07e8c94c79..ee40799712d4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -222,6 +222,8 @@ static void nbd_dev_remove(struct nbd_device *nbd)
 	struct gendisk *disk = nbd->disk;
 	struct request_queue *q;
 
+	destroy_workqueue(nbd->recv_workq);
+
 	if (disk) {
 		q = disk->queue;
 		del_gendisk(disk);
@@ -1177,10 +1180,6 @@ static void nbd_config_put(struct nbd_device *nbd)
 		kfree(nbd->config);
 		nbd->config = NULL;
 
-		if (nbd->recv_workq)
-			destroy_workqueue(nbd->recv_workq);
-		nbd->recv_workq = NULL;
-
 		nbd->tag_set.timeout = 0;
 		nbd->disk->queue->limits.discard_granularity = 0;
 		nbd->disk->queue->limits.discard_alignment = 0;
@@ -1209,14 +1208,6 @@ static int nbd_start_device(struct nbd_device *nbd)
 		return -EINVAL;
 	}
 
-	nbd->recv_workq = alloc_workqueue("knbd%d-recv",
-					  WQ_MEM_RECLAIM | WQ_HIGHPRI |
-					  WQ_UNBOUND, 0, nbd->index);
-	if (!nbd->recv_workq) {
-		dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
-		return -ENOMEM;
-	}
-
 	blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
 	nbd->task_recv = current;
 
@@ -1267,10 +1258,8 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
 	mutex_unlock(&nbd->config_lock);
 	ret = wait_event_interruptible(config->recv_wq,
 					 atomic_read(&config->recv_threads) == 0);
-	if (ret) {
+	if (ret)
 		sock_shutdown(nbd);
-		flush_workqueue(nbd->recv_workq);
-	}
 	mutex_lock(&nbd->config_lock);
 	nbd_bdev_reset(bdev);
 	/* user requested, ignore socket errors */
@@ -1656,9 +1645,17 @@ static int nbd_dev_add(int index)
 	nbd->tag_set.driver_data = nbd;
 	nbd->destroy_complete = NULL;
 
+	nbd->recv_workq = alloc_workqueue("knbd%d-recv",
+					  WQ_MEM_RECLAIM | WQ_HIGHPRI |
+					  WQ_UNBOUND, 0, nbd->index);
+	if (!nbd->recv_workq) {
+		dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
+		goto out_free_idr;
+	}
+
 	err = blk_mq_alloc_tag_set(&nbd->tag_set);
 	if (err)
-		goto out_free_idr;
+		goto out_free_wq;
 
 	q = blk_mq_init_queue(&nbd->tag_set);
 	if (IS_ERR(q)) {
@@ -1695,6 +1692,8 @@ static int nbd_dev_add(int index)
 
 out_free_tags:
 	blk_mq_free_tag_set(&nbd->tag_set);
+out_free_wq:
+	destroy_workqueue(nbd->recv_workq);
 out_free_idr:
 	idr_remove(&nbd_index_idr, index);
 out_free_disk:
@@ -1949,7 +1948,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
 	mutex_unlock(&nbd->config_lock);
 	/*
 	 * Make sure recv thread has finished, so it does not drop the last
-	 * config ref and try to destroy the workqueue from inside the work
+	 * nbd_device ref and try to destroy the workqueue from inside the work
 	 * queue.
 	 */
 	flush_workqueue(nbd->recv_workq);
-- 
2.20.1