[PATCH RFC] loop: make partition scanning reliable

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



So far, loop-device partition-scanning is skipped with EBUSY if the
bd_mutex cannot be locked. This might happen, if someone runs open() /
close() / ioctl() in parallel to LOOP_SET_FD/LOOP_CHANGE_FD and friends.

__fput() on open files might get delayed arbitrarily, which means,
blkdev_put() might get called at any point in time, locking bd_mutex and
thus preventing any possible partition-rescanning in parallel. This
basically requires user-space to *always* run BLKRRSCAN after
LOOP_SET_FD/LOOP_CHANGE_FD as we cannot know whether someone else is about
to close their FD at the same time.

Fix this by running BLKRRPART without holding lo_ctl_mutex, thus, we will
no longer deadlock if we lock bd_mutex.

Signed-off-by: David Herrmann <dh.herrmann@xxxxxxxxx>
---
 block/ioctl.c        |  9 ++++++---
 drivers/block/loop.c | 49 ++++++++++++++++++++++++++++++-------------------
 include/linux/fs.h   |  1 +
 3 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/block/ioctl.c b/block/ioctl.c
index 6c7bf90..01c6550 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -150,7 +150,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 	}
 }
 
-static int blkdev_reread_part(struct block_device *bdev)
+int blkdev_reread_part(struct block_device *bdev, int skipbusy)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	int res;
@@ -159,12 +159,15 @@ static int blkdev_reread_part(struct block_device *bdev)
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	if (!mutex_trylock(&bdev->bd_mutex))
+	if (!skipbusy)
+		mutex_lock(&bdev->bd_mutex);
+	else if (!mutex_trylock(&bdev->bd_mutex))
 		return -EBUSY;
 	res = rescan_partitions(disk, bdev);
 	mutex_unlock(&bdev->bd_mutex);
 	return res;
 }
+EXPORT_SYMBOL(blkdev_reread_part);
 
 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 			     uint64_t len, int secure)
@@ -407,7 +410,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
 		break;
 	case BLKRRPART:
-		ret = blkdev_reread_part(bdev);
+		ret = blkdev_reread_part(bdev, 1);
 		break;
 	case BLKGETSIZE:
 		size = i_size_read(bdev->bd_inode);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6cb1beb..4047985 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -637,7 +637,7 @@ out:
  * new backing store is the same size and type as the old backing store.
  */
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
-			  unsigned int arg)
+			  unsigned int arg, int *rrpart)
 {
 	struct file	*file, *old_file;
 	struct inode	*inode;
@@ -676,7 +676,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
 	fput(old_file);
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		*rrpart = 1;
 	return 0;
 
  out_putf:
@@ -821,7 +821,7 @@ static void loop_config_discard(struct loop_device *lo)
 }
 
 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
-		       struct block_device *bdev, unsigned int arg)
+		       struct block_device *bdev, unsigned int arg, int *rrpart)
 {
 	struct file	*file, *f;
 	struct inode	*inode;
@@ -917,7 +917,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		*rrpart = 1;
 
 	/* Grab the block_device to prevent its destruction after we
 	 * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
@@ -1064,7 +1064,8 @@ static int loop_clr_fd(struct loop_device *lo)
 }
 
 static int
-loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
+loop_set_status(struct loop_device *lo, const struct loop_info64 *info,
+		int *rrpart)
 {
 	int err;
 	struct loop_func_table *xfer;
@@ -1123,7 +1124,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-		ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+		*rrpart = 1;
 	}
 
 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
@@ -1223,7 +1224,8 @@ loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
 }
 
 static int
-loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
+loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg,
+		    int *rrpart)
 {
 	struct loop_info info;
 	struct loop_info64 info64;
@@ -1231,17 +1233,18 @@ loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
 	if (copy_from_user(&info, arg, sizeof (struct loop_info)))
 		return -EFAULT;
 	loop_info64_from_old(&info, &info64);
-	return loop_set_status(lo, &info64);
+	return loop_set_status(lo, &info64, rrpart);
 }
 
 static int
-loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
+loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg,
+		  int *rrpart)
 {
 	struct loop_info64 info64;
 
 	if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
 		return -EFAULT;
-	return loop_set_status(lo, &info64);
+	return loop_set_status(lo, &info64, rrpart);
 }
 
 static int
@@ -1289,15 +1292,15 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
 	struct loop_device *lo = bdev->bd_disk->private_data;
-	int err;
+	int err, rrpart = 0;
 
 	mutex_lock_nested(&lo->lo_ctl_mutex, 1);
 	switch (cmd) {
 	case LOOP_SET_FD:
-		err = loop_set_fd(lo, mode, bdev, arg);
+		err = loop_set_fd(lo, mode, bdev, arg, &rrpart);
 		break;
 	case LOOP_CHANGE_FD:
-		err = loop_change_fd(lo, bdev, arg);
+		err = loop_change_fd(lo, bdev, arg, &rrpart);
 		break;
 	case LOOP_CLR_FD:
 		/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
@@ -1309,7 +1312,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
 			err = loop_set_status_old(lo,
-					(struct loop_info __user *)arg);
+					(struct loop_info __user *)arg,
+					&rrpart);
 		break;
 	case LOOP_GET_STATUS:
 		err = loop_get_status_old(lo, (struct loop_info __user *) arg);
@@ -1318,7 +1322,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
 			err = loop_set_status64(lo,
-					(struct loop_info64 __user *) arg);
+					(struct loop_info64 __user *) arg,
+					&rrpart);
 		break;
 	case LOOP_GET_STATUS64:
 		err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
@@ -1334,6 +1339,9 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	mutex_unlock(&lo->lo_ctl_mutex);
 
 out_unlocked:
+	if (rrpart)
+		blkdev_reread_part(bdev, 0);
+
 	return err;
 }
 
@@ -1429,7 +1437,7 @@ loop_info64_to_compat(const struct loop_info64 *info64,
 
 static int
 loop_set_status_compat(struct loop_device *lo,
-		       const struct compat_loop_info __user *arg)
+		       const struct compat_loop_info __user *arg, int *rrpart)
 {
 	struct loop_info64 info64;
 	int ret;
@@ -1437,7 +1445,7 @@ loop_set_status_compat(struct loop_device *lo,
 	ret = loop_info64_from_compat(arg, &info64);
 	if (ret < 0)
 		return ret;
-	return loop_set_status(lo, &info64);
+	return loop_set_status(lo, &info64, rrpart);
 }
 
 static int
@@ -1460,14 +1468,17 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 			   unsigned int cmd, unsigned long arg)
 {
 	struct loop_device *lo = bdev->bd_disk->private_data;
-	int err;
+	int err, rrpart = 0;
 
 	switch(cmd) {
 	case LOOP_SET_STATUS:
 		mutex_lock(&lo->lo_ctl_mutex);
 		err = loop_set_status_compat(
-			lo, (const struct compat_loop_info __user *) arg);
+			lo, (const struct compat_loop_info __user *) arg,
+			&rrpart);
 		mutex_unlock(&lo->lo_ctl_mutex);
+		if (rrpart)
+			blkdev_reread_part(bdev, 0);
 		break;
 	case LOOP_GET_STATUS:
 		mutex_lock(&lo->lo_ctl_mutex);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ab779e..755a056 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2153,6 +2153,7 @@ extern const struct file_operations bad_sock_fops;
 #ifdef CONFIG_BLOCK
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
+extern int blkdev_reread_part(struct block_device *bdev, int skipbusy);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
 extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
 extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
-- 
2.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux