From: Ming Lei <ming.lei@xxxxxxxxxxxxx> The lo_ctl_mutex is held for running all ioctl handlers, and in some ioctl handlers, ioctl_by_bdev(BLKRRPART) is called for rereading partitions, which requires bd_mutex. So it is easy to cause failure because trylock(bd_mutex) may fail inside blkdev_reread_part(), and follows the lock context: blkid or other application: ->open() ->mutex_lock(bd_mutex) ->lo_open() ->mutex_lock(lo_ctl_mutex) losetup(set fd ioctl): ->mutex_lock(lo_ctl_mutex) ->ioctl_by_bdev(BLKRRPART) ->trylock(bd_mutex) This patch trys to eliminate the ABBA lock dependency by removing lo_ctl_mutext in lo_open() with the following approach: 1) introduce lo_open_mutex to protect lo_refcnt and avoid acquiring lo_ctl_mutex in lo_open(): - for open vs. add/del loop, no any problem because of loop_index_mutex - lo_open_mutex is used for syncing open() and loop_clr_fd() - both open() and release() have been serialized by bd_mutex already 2) don't hold lo_ctl_mutex for decreasing/checking lo_refcnt in lo_release(), then lo_ctl_mutex is only required for the last release. CC: Christoph Hellwig <hch@xxxxxxxxxxxxx> CC: Jens Axboe <axboe@xxxxxxxxx> CC: Tejun Heo <tj@xxxxxxxxxx> CC: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> CC: Markus Pargmann <mpa@xxxxxxxxxxxxxx> CC: Stefan Weinhuber <wein@xxxxxxxxxx> CC: Stefan Haberland <stefan.haberland@xxxxxxxxxx> CC: Sebastian Ott <sebott@xxxxxxxxxxxxxxxxxx> CC: Fabian Frederick <fabf@xxxxxxxxx> CC: Ming Lei <ming.lei@xxxxxxxxxxxxx> CC: David Herrmann <dh.herrmann@xxxxxxxxx> CC: Mike Galbraith <bitbucket@xxxxxxxxx> CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CC: nbd-general@xxxxxxxxxxxxxxxxxxxxx CC: linux-s390@xxxxxxxxxxxxxxx Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx> Signed-off-by: Jarod Wilson <jarod@xxxxxxxxxx> --- drivers/block/loop.c | 32 ++++++++++++++++++++++++++------ drivers/block/loop.h | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d1f168b..81a6bc1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -879,14 +879,18 @@ static int loop_clr_fd(struct loop_device *lo) * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d * command to fail with EBUSY. */ + mutex_lock(&lo->lo_open_mutex); if (lo->lo_refcnt > 1) { + mutex_unlock(&lo->lo_open_mutex); lo->lo_flags |= LO_FLAGS_AUTOCLEAR; mutex_unlock(&lo->lo_ctl_mutex); return 0; } - if (filp == NULL) + if (filp == NULL) { + mutex_unlock(&lo->lo_open_mutex); return -EINVAL; + } spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; @@ -919,6 +923,15 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); + + /* + * Unlock open_mutex for avoiding -EBUSY of rereading part: + * - try to acquire bd_mutex from reread part + * - another task is opening the loop with holding bd_mutex + * and trys to acquire open_mutex + */ + mutex_unlock(&lo->lo_open_mutex); + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) ioctl_by_bdev(bdev, BLKRRPART, 0); lo->lo_flags = 0; @@ -1376,9 +1389,9 @@ static int lo_open(struct block_device *bdev, fmode_t mode) goto out; } - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&lo->lo_open_mutex); lo->lo_refcnt++; - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&lo->lo_open_mutex); out: mutex_unlock(&loop_index_mutex); return err; @@ -1387,13 +1400,16 @@ out: static void lo_release(struct gendisk *disk, fmode_t mode) { struct loop_device *lo = disk->private_data; - int err; + int err, ref; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&lo->lo_open_mutex); + ref = --lo->lo_refcnt; + mutex_unlock(&lo->lo_open_mutex); - if (--lo->lo_refcnt) + if (ref) goto out; + mutex_lock(&lo->lo_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1646,6 +1662,7 @@ static int loop_add(struct loop_device **l, int i) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; mutex_init(&lo->lo_ctl_mutex); + mutex_init(&lo->lo_open_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; @@ -1763,11 +1780,14 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&lo->lo_ctl_mutex); break; } + mutex_lock(&lo->lo_open_mutex); if (lo->lo_refcnt > 0) { ret = -EBUSY; + mutex_unlock(&lo->lo_open_mutex); mutex_unlock(&lo->lo_ctl_mutex); break; } + mutex_unlock(&lo->lo_open_mutex); lo->lo_disk->private_data = NULL; mutex_unlock(&lo->lo_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 301c27f..1b4acf2 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -59,6 +59,7 @@ struct loop_device { bool write_started; int lo_state; struct mutex lo_ctl_mutex; + struct mutex lo_open_mutex; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-s390" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html