This fixes a regression added with: commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4 Author: Mike Christie <mchristi@xxxxxxxxxx> Date: Sun Aug 4 14:10:06 2019 -0500 nbd: fix max number of supported devs Before the patch, if we got a signal in the NBD_DO_IT ioctl, we would call sock_shutdown then return immediately. The patch added a flush_workqueue call in that error handler path, so we now wait for the recv_work to complete. The problem is that some sockets do not implemnent a shutdown callout, so when sock_shutdown is called the flush_workqueue call is stuck waiting for the workqueue to break out of the sock_recvmsg call. This patch just moves where we create/destroy the recv workqueue to the nbd_device and removes the flush_workqueue call, so we have the behavior we had before where when using the ioctl interface and we get a signal the recv works will be left running until module removal time when the devices are removed. For the next kernel, I think we can do something more invasive like switch from workqueues to threads and use signals to break out of the recvmsg call (we had recently removed the last signal use for socket shutdown so I was not sure if we wanted to do this), or figure out a list of sockets families nbd supports and implement shutdown functions for them (I did not get a reply for that here https://lkml.org/lkml/2019/10/1/1323 so I'm assuming no one really knows and and I am still digging into that). Reported-by: syzbot+24c12fa8d218ed26011a@xxxxxxxxxxxxxxxxxxxxxxxxx Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Signed-off-by: Mike Christie <mchristi@xxxxxxxxxx> --- drivers/block/nbd.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ac07e8c94c79..ee40799712d4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -222,6 +222,8 @@ static void nbd_dev_remove(struct nbd_device *nbd) struct gendisk *disk = nbd->disk; struct request_queue *q; + destroy_workqueue(nbd->recv_workq); + if (disk) { q = disk->queue; del_gendisk(disk); @@ -1177,10 +1180,6 @@ static void nbd_config_put(struct nbd_device *nbd) kfree(nbd->config); nbd->config = NULL; - if (nbd->recv_workq) - destroy_workqueue(nbd->recv_workq); - nbd->recv_workq = NULL; - nbd->tag_set.timeout = 0; nbd->disk->queue->limits.discard_granularity = 0; nbd->disk->queue->limits.discard_alignment = 0; @@ -1209,14 +1208,6 @@ static int nbd_start_device(struct nbd_device *nbd) return -EINVAL; } - nbd->recv_workq = alloc_workqueue("knbd%d-recv", - WQ_MEM_RECLAIM | WQ_HIGHPRI | - WQ_UNBOUND, 0, nbd->index); - if (!nbd->recv_workq) { - dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n"); - return -ENOMEM; - } - blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); nbd->task_recv = current; @@ -1267,10 +1258,8 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); - if (ret) { + if (ret) sock_shutdown(nbd); - flush_workqueue(nbd->recv_workq); - } mutex_lock(&nbd->config_lock); nbd_bdev_reset(bdev); /* user requested, ignore socket errors */ @@ -1656,9 +1645,17 @@ static int nbd_dev_add(int index) nbd->tag_set.driver_data = nbd; nbd->destroy_complete = NULL; + nbd->recv_workq = alloc_workqueue("knbd%d-recv", + WQ_MEM_RECLAIM | WQ_HIGHPRI | + WQ_UNBOUND, 0, nbd->index); + if (!nbd->recv_workq) { + dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n"); + goto out_free_idr; + } + err = blk_mq_alloc_tag_set(&nbd->tag_set); if (err) - goto out_free_idr; + goto out_free_wq; q = blk_mq_init_queue(&nbd->tag_set); if (IS_ERR(q)) { @@ -1695,6 +1692,8 @@ static int nbd_dev_add(int index) out_free_tags: blk_mq_free_tag_set(&nbd->tag_set); +out_free_wq: + destroy_workqueue(nbd->recv_workq); out_free_idr: idr_remove(&nbd_index_idr, index); out_free_disk: @@ -1949,7 +1948,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) mutex_unlock(&nbd->config_lock); /* * Make sure recv thread has finished, so it does not drop the last - * config ref and try to destroy the workqueue from inside the work + * nbd_device ref and try to destroy the workqueue from inside the work * queue. */ flush_workqueue(nbd->recv_workq); -- 2.20.1