[PATCH] nbd: Fix races introduced by nbd_index_mutex scope reduction

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch fixes oversights in "nbd: refactor device search and allocation
in nbd_genl_connect" and "nbd: reduce the nbd_index_mutex scope".

Previously nbd_index_mutex was held during whole add/remove/lookup
operations in order to guarantee that partially initialized devices are
not reachable via idr_find() or idr_for_each(). But now that partially
initialized devices become reachable as soon as idr_alloc() succeeds,
we need to skip partially initialized devices. Since it seems that
all functions use refcount_inc_not_zero(&nbd->refs) in order to skip
destroying devices, update nbd->refs from zero to non-zero as the last
step of device initialization in order to also skip partially initialized
devices. Also, update nbd->index before releasing nbd_index_mutex, for
populate_nbd_status() might access nbd->index as soon as nbd_index_mutex
is released. Error messages which assume that nbd->refs == 0 as "going
down" might be no longer accurate, but this patch does not update them.

Use of per "struct nbd_device" completion is not thread-safe.
Since nbd_index_mutex is released before calling wait_for_completion(),
when multiple threads concurrently reached wait_for_completion(), only
one thread which set nbd->destroy_complete for the last time is woken up
by complete() and all other threads will hang with uninterruptible state.
Use global completion with short timeout, for extra wake up results in
harmless retries. But nbd must be reset to NULL after "goto again;", or
use-after-free access will happen if idr_for_each_entry() did not find
any initialized free device.

Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
---
Only compile tested. This patch is a hint for Christoph Hellwig
who needs to know what the global mutex was serializing...

 drivers/block/nbd.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index c5e2b4cd697f..4580016eca44 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -51,6 +51,7 @@ static DEFINE_IDR(nbd_index_idr);
 static DEFINE_MUTEX(nbd_index_mutex);
 static struct workqueue_struct *nbd_del_wq;
 static int nbd_total_devices = 0;
+static DECLARE_COMPLETION(nbd_destroy_complete);
 
 struct nbd_sock {
 	struct socket *sock;
@@ -120,7 +121,6 @@ struct nbd_device {
 	struct task_struct *task_recv;
 	struct task_struct *task_setup;
 
-	struct completion *destroy_complete;
 	unsigned long flags;
 
 	char *backend;
@@ -243,9 +243,10 @@ static const struct device_attribute backend_attr = {
  */
 static void nbd_notify_destroy_completion(struct nbd_device *nbd)
 {
-	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
-	    nbd->destroy_complete)
-		complete(nbd->destroy_complete);
+	if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags)) {
+		complete_all(&nbd_destroy_complete);
+		reinit_completion(&nbd_destroy_complete);
+	}
 }
 
 static void nbd_dev_remove(struct nbd_device *nbd)
@@ -1706,7 +1707,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
 		BLK_MQ_F_BLOCKING;
 	nbd->tag_set.driver_data = nbd;
 	INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
-	nbd->destroy_complete = NULL;
 	nbd->backend = NULL;
 
 	err = blk_mq_alloc_tag_set(&nbd->tag_set);
@@ -1724,10 +1724,10 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
 		if (err >= 0)
 			index = err;
 	}
+	nbd->index = index;
 	mutex_unlock(&nbd_index_mutex);
 	if (err < 0)
 		goto out_free_tags;
-	nbd->index = index;
 
 	disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
 	if (IS_ERR(disk)) {
@@ -1751,7 +1751,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
 
 	mutex_init(&nbd->config_lock);
 	refcount_set(&nbd->config_refs, 0);
-	refcount_set(&nbd->refs, refs);
 	INIT_LIST_HEAD(&nbd->list);
 	disk->major = NBD_MAJOR;
 
@@ -1770,11 +1769,14 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
 	disk->private_data = nbd;
 	sprintf(disk->disk_name, "nbd%d", index);
 	add_disk(disk);
+	refcount_set(&nbd->refs, refs);
 	nbd_total_devices++;
 	return nbd;
 
 out_free_idr:
+	mutex_lock(&nbd_index_mutex);
 	idr_remove(&nbd_index_idr, index);
+	mutex_unlock(&nbd_index_mutex);
 out_free_tags:
 	blk_mq_free_tag_set(&nbd->tag_set);
 out_free_nbd:
@@ -1829,8 +1831,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
 
 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 {
-	DECLARE_COMPLETION_ONSTACK(destroy_complete);
-	struct nbd_device *nbd = NULL;
+	struct nbd_device *nbd;
 	struct nbd_config *config;
 	int index = -1;
 	int ret;
@@ -1855,8 +1856,10 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 		struct nbd_device *tmp;
 		int id;
 
+		nbd = NULL;
 		idr_for_each_entry(&nbd_index_idr, tmp, id) {
-			if (!refcount_read(&tmp->config_refs)) {
+			if (!refcount_read(&tmp->config_refs) &&
+			    refcount_read(&tmp->refs)) {
 				nbd = tmp;
 				break;
 			}
@@ -1868,11 +1871,10 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 	if (nbd) {
 		if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
 		    test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
-			nbd->destroy_complete = &destroy_complete;
 			mutex_unlock(&nbd_index_mutex);
 
 			/* wait until the nbd device is completely destroyed */
-			wait_for_completion(&destroy_complete);
+			wait_for_completion_timeout(&nbd_destroy_complete, HZ / 10);
 			goto again;
 		}
 
-- 
2.18.4





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux