Re: [PATCH V3 for-6.10/block] loop: Fix a race between loop detach and loop open

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

在 2024/05/30 4:02, Gulam Mohamed 写道:
1. Userspace sends the command "losetup -d" which uses the open() call
    to open the device
2. Kernel receives the ioctl command "LOOP_CLR_FD" which calls the
    function loop_clr_fd()
3. If LOOP_CLR_FD is the first command received at the time, then the
    AUTOCLEAR flag is not set and deletion of the
    loop device proceeds ahead and scans the partitions (drop/add
    partitions)

         if (disk_openers(lo->lo_disk) > 1) {
                 lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
                 loop_global_unlock(lo, true);
                 return 0;
         }

  4. Before scanning partitions, it will check to see if any partition of
     the loop device is currently opened
  5. If any partition is opened, then it will return EBUSY:

     if (disk->open_partitions)
                 return -EBUSY;
  6. So, after receiving the "LOOP_CLR_FD" command and just before the above
     check for open_partitions, if any other command
     (like blkid) opens any partition of the loop device, then the partition
     scan will not proceed and EBUSY is returned as shown in above code
  7. But in "__loop_clr_fd()", this EBUSY error is not propagated
  8. We have noticed that this is causing the partitions of the loop to
     remain stale even after the loop device is detached resulting in the
     IO errors on the partitions

Fix:
Re-introduce the lo_open() call to restrict any process to open the loop
device when its being detached

Test case involves the following two scripts:

script1.sh:

while [ 1 ];
do
         losetup -P -f /home/opt/looptest/test10.img
         blkid /dev/loop0p1
done

script2.sh:

while [ 1 ];
do
         losetup -d /dev/loop0
done

Without fix, the following IO errors have been observed:

kernel: __loop_clr_fd: partition scan of loop0 failed (rc=-16)
kernel: I/O error, dev loop0, sector 20971392 op 0x0:(READ) flags 0x80700
         phys_seg 1 prio class 0
kernel: I/O error, dev loop0, sector 108868 op 0x0:(READ) flags 0x0
         phys_seg 1 prio class 0
kernel: Buffer I/O error on dev loop0p1, logical block 27201, async page
         read

Signed-off-by: Gulam Mohamed <gulam.mohamed@xxxxxxxxxx>
---
v3<-v2:
Re-introduced the loop->lo_refcnt to take care of the case where we race
when the Lo_rundown is set after the lo_open() function releases the
lo_mutex lock

  drivers/block/loop.c | 31 ++++++++++++++++++++++++++-----
  1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 28a95fd366fe..60f61bf8dbd1 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -49,6 +49,7 @@ struct loop_func_table;
struct loop_device {
  	int		lo_number;
+	atomic_t        lo_refcnt;
  	loff_t		lo_offset;
  	loff_t		lo_sizelimit;
  	int		lo_flags;
@@ -1242,7 +1243,7 @@ static int loop_clr_fd(struct loop_device *lo)
  	 * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
  	 * command to fail with EBUSY.
  	 */
-	if (disk_openers(lo->lo_disk) > 1) {
+	if (atomic_read(&lo->lo_refcnt) > 1) {
  		lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
  		loop_global_unlock(lo, true);
  		return 0;
@@ -1717,14 +1718,31 @@ static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
  }
  #endif
-static void lo_release(struct gendisk *disk)
+static int lo_open(struct gendisk *disk, blk_mode_t mode)
  {
  	struct loop_device *lo = disk->private_data;
+	int err;
- if (disk_openers(disk) > 0)
-		return;
+	err = mutex_lock_killable(&lo->lo_mutex);
+	if (err)
+		return err;
+
+	if (lo->lo_state == Lo_deleting || lo->lo_state == Lo_rundown)
+		err = -ENXIO;
+	else
+		atomic_inc(&lo->lo_refcnt);
+	mutex_unlock(&lo->lo_mutex);
+	return err;
+}
+
+static void lo_release(struct gendisk *disk)
+{
+	struct loop_device *lo = disk->private_data;
mutex_lock(&lo->lo_mutex);
+	if (atomic_dec_return(&lo->lo_refcnt))
+		goto out_unlock;

So, both add, dec and test are inside the lo_mutex, then there is no
need to use atomic value.

Thanks,
Kuai
+
  	if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
  		lo->lo_state = Lo_rundown;
  		mutex_unlock(&lo->lo_mutex);
@@ -1735,6 +1753,7 @@ static void lo_release(struct gendisk *disk)
  		__loop_clr_fd(lo, true);
  		return;
  	}
+out_unlock:
  	mutex_unlock(&lo->lo_mutex);
  }
@@ -1752,6 +1771,7 @@ static void lo_free_disk(struct gendisk *disk) static const struct block_device_operations lo_fops = {
  	.owner =	THIS_MODULE,
+	.open =         lo_open,
  	.release =	lo_release,
  	.ioctl =	lo_ioctl,
  #ifdef CONFIG_COMPAT
@@ -2064,6 +2084,7 @@ static int loop_add(int i)
  	 */
  	if (!part_shift)
  		set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
+	atomic_set(&lo->lo_refcnt, 0);
  	mutex_init(&lo->lo_mutex);
  	lo->lo_number		= i;
  	spin_lock_init(&lo->lo_lock);
@@ -2158,7 +2179,7 @@ static int loop_control_remove(int idx)
  	ret = mutex_lock_killable(&lo->lo_mutex);
  	if (ret)
  		goto mark_visible;
-	if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
+	if (lo->lo_state != Lo_unbound || atomic_read(&lo->lo_refcnt) > 0) {
  		mutex_unlock(&lo->lo_mutex);
  		ret = -EBUSY;
  		goto mark_visible;






[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux