RE: [PATCH V3 for-6.10/block] loop: Fix a race between loop detach and loop open

Gulam Mohamed <gulam.mohamed@xxxxxxxxxx> · Thu, 30 May 2024 16:28:23 +0000

Hi Kuai,

> -----Original Message-----
> From: Yu Kuai <yukuai1@xxxxxxxxxxxxxxx>
> Sent: Thursday, May 30, 2024 7:36 AM
> To: Gulam Mohamed <gulam.mohamed@xxxxxxxxxx>; linux-
> block@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx
> Cc: yukuai1@xxxxxxxxxxxxxxx; hch@xxxxxx; axboe@xxxxxxxxx; yukuai (C)
> <yukuai3@xxxxxxxxxx>
> Subject: Re: [PATCH V3 for-6.10/block] loop: Fix a race between loop detach
> and loop open
> 
> Hi,
> 
> 在 2024/05/30 4:02, Gulam Mohamed 写道:
> > 1. Userspace sends the command "losetup -d" which uses the open() call
> >     to open the device
> > 2. Kernel receives the ioctl command "LOOP_CLR_FD" which calls the
> >     function loop_clr_fd()
> > 3. If LOOP_CLR_FD is the first command received at the time, then the
> >     AUTOCLEAR flag is not set and deletion of the
> >     loop device proceeds ahead and scans the partitions (drop/add
> >     partitions)
> >
> >          if (disk_openers(lo->lo_disk) > 1) {
> >                  lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
> >                  loop_global_unlock(lo, true);
> >                  return 0;
> >          }
> >
> >   4. Before scanning partitions, it will check to see if any partition of
> >      the loop device is currently opened
> >   5. If any partition is opened, then it will return EBUSY:
> >
> >      if (disk->open_partitions)
> >                  return -EBUSY;
> >   6. So, after receiving the "LOOP_CLR_FD" command and just before the
> above
> >      check for open_partitions, if any other command
> >      (like blkid) opens any partition of the loop device, then the partition
> >      scan will not proceed and EBUSY is returned as shown in above code
> >   7. But in "__loop_clr_fd()", this EBUSY error is not propagated
> >   8. We have noticed that this is causing the partitions of the loop to
> >      remain stale even after the loop device is detached resulting in the
> >      IO errors on the partitions
> >
> > Fix:
> > Re-introduce the lo_open() call to restrict any process to open the
> > loop device when its being detached
> >
> > Test case involves the following two scripts:
> >
> > script1.sh:
> >
> > while [ 1 ];
> > do
> >          losetup -P -f /home/opt/looptest/test10.img
> >          blkid /dev/loop0p1
> > done
> >
> > script2.sh:
> >
> > while [ 1 ];
> > do
> >          losetup -d /dev/loop0
> > done
> >
> > Without fix, the following IO errors have been observed:
> >
> > kernel: __loop_clr_fd: partition scan of loop0 failed (rc=-16)
> > kernel: I/O error, dev loop0, sector 20971392 op 0x0:(READ) flags 0x80700
> >          phys_seg 1 prio class 0
> > kernel: I/O error, dev loop0, sector 108868 op 0x0:(READ) flags 0x0
> >          phys_seg 1 prio class 0
> > kernel: Buffer I/O error on dev loop0p1, logical block 27201, async page
> >          read
> >
> > Signed-off-by: Gulam Mohamed <gulam.mohamed@xxxxxxxxxx>
> > ---
> > v3<-v2:
> > Re-introduced the loop->lo_refcnt to take care of the case where we
> > race when the Lo_rundown is set after the lo_open() function releases
> > the lo_mutex lock
> >
> >   drivers/block/loop.c | 31 ++++++++++++++++++++++++++-----
> >   1 file changed, 26 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/block/loop.c b/drivers/block/loop.c index
> > 28a95fd366fe..60f61bf8dbd1 100644
> > --- a/drivers/block/loop.c
> > +++ b/drivers/block/loop.c
> > @@ -49,6 +49,7 @@ struct loop_func_table;
> >
> >   struct loop_device {
> >   	int		lo_number;
> > +	atomic_t        lo_refcnt;
> >   	loff_t		lo_offset;
> >   	loff_t		lo_sizelimit;
> >   	int		lo_flags;
> > @@ -1242,7 +1243,7 @@ static int loop_clr_fd(struct loop_device *lo)
> >   	 * <dev>/do something like mkfs/losetup -d <dev> causing the losetup
> -d
> >   	 * command to fail with EBUSY.
> >   	 */
> > -	if (disk_openers(lo->lo_disk) > 1) {
> > +	if (atomic_read(&lo->lo_refcnt) > 1) {
> >   		lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
> >   		loop_global_unlock(lo, true);
> >   		return 0;
> > @@ -1717,14 +1718,31 @@ static int lo_compat_ioctl(struct block_device
> *bdev, blk_mode_t mode,
> >   }
> >   #endif
> >
> > -static void lo_release(struct gendisk *disk)
> > +static int lo_open(struct gendisk *disk, blk_mode_t mode)
> >   {
> >   	struct loop_device *lo = disk->private_data;
> > +	int err;
> >
> > -	if (disk_openers(disk) > 0)
> > -		return;
> > +	err = mutex_lock_killable(&lo->lo_mutex);
> > +	if (err)
> > +		return err;
> > +
> > +	if (lo->lo_state == Lo_deleting || lo->lo_state == Lo_rundown)
> > +		err = -ENXIO;
> > +	else
> > +		atomic_inc(&lo->lo_refcnt);
> > +	mutex_unlock(&lo->lo_mutex);
> > +	return err;
> > +}
> > +
> > +static void lo_release(struct gendisk *disk) {
> > +	struct loop_device *lo = disk->private_data;
> >
> >   	mutex_lock(&lo->lo_mutex);
> > +	if (atomic_dec_return(&lo->lo_refcnt))
> > +		goto out_unlock;
> 
> So, both add, dec and test are inside the lo_mutex, then there is no need to
> use atomic value.
Yes, you are correct. Will change this to "int" in next version.

Thanks & Regards,
Gulam Mohamed.
> 
> Thanks,
> Kuai
> > +
> >   	if (lo->lo_state == Lo_bound && (lo->lo_flags &
> LO_FLAGS_AUTOCLEAR)) {
> >   		lo->lo_state = Lo_rundown;
> >   		mutex_unlock(&lo->lo_mutex);
> > @@ -1735,6 +1753,7 @@ static void lo_release(struct gendisk *disk)
> >   		__loop_clr_fd(lo, true);
> >   		return;
> >   	}
> > +out_unlock:
> >   	mutex_unlock(&lo->lo_mutex);
> >   }
> >
> > @@ -1752,6 +1771,7 @@ static void lo_free_disk(struct gendisk *disk)
> >
> >   static const struct block_device_operations lo_fops = {
> >   	.owner =	THIS_MODULE,
> > +	.open =         lo_open,
> >   	.release =	lo_release,
> >   	.ioctl =	lo_ioctl,
> >   #ifdef CONFIG_COMPAT
> > @@ -2064,6 +2084,7 @@ static int loop_add(int i)
> >   	 */
> >   	if (!part_shift)
> >   		set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
> > +	atomic_set(&lo->lo_refcnt, 0);
> >   	mutex_init(&lo->lo_mutex);
> >   	lo->lo_number		= i;
> >   	spin_lock_init(&lo->lo_lock);
> > @@ -2158,7 +2179,7 @@ static int loop_control_remove(int idx)
> >   	ret = mutex_lock_killable(&lo->lo_mutex);
> >   	if (ret)
> >   		goto mark_visible;
> > -	if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
> > +	if (lo->lo_state != Lo_unbound || atomic_read(&lo->lo_refcnt) > 0) {
> >   		mutex_unlock(&lo->lo_mutex);
> >   		ret = -EBUSY;
> >   		goto mark_visible;
> >