From: Li Nan <linan122@xxxxxxxxxx> There is a risk of deadlock when a process gets disk->open_mutex after suspending mddev, because other processes may hold open_mutex while submitting io. For example: T1 T2 blkdev_open bdev_open_by_dev mutex_lock(&disk->open_mutex) md_ioctl mddev_suspend_and_lock mddev_suspend md_add_new_disk bind_rdev_to_array bd_link_disk_holder //wait open_mutex blkdev_get_whole bdev_disk_changed efi_partition read_lba ... md_submit_bio md_handle_request //wait resume Fix it by getting disk->open_mutex after mddev resume, iterating each mddev->disk to create symlink for rdev which has not been created yet. and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been deleted from mddev->disks here, which can avoid concurrent bind and unbind, Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls involed array reconfiguration") Signed-off-by: Li Nan <linan122@xxxxxxxxxx> --- drivers/md/md.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index d6612b922c76..c128570f2a5d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev) } EXPORT_SYMBOL_GPL(mddev_resume); +static void md_link_disk_holder(struct mddev *mddev) +{ + struct md_rdev *rdev; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (test_bit(SymlinkCreated, &rdev->flags)) + continue; + if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) + set_bit(SymlinkCreated, &rdev->flags); + } + rcu_read_unlock(); +} + /* * Generic flush handling for md */ @@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev) list_for_each_entry_safe(rdev, tmp, &delete, same_set) { list_del_init(&rdev->same_set); + if (test_bit(SymlinkCreated, &rdev->flags)) { + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); + clear_bit(SymlinkCreated, &rdev->flags); + } + rdev->mddev = NULL; kobject_del(&rdev->kobj); export_rdev(rdev, mddev); } @@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks"); list_add_rcu(&rdev->same_set, &mddev->disks); - if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) - set_bit(SymlinkCreated, &rdev->flags); /* May as well allow recovery to be retried once */ mddev->recovery_disabled++; @@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) { struct mddev *mddev = rdev->mddev; - if (test_bit(SymlinkCreated, &rdev->flags)) { - bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); - clear_bit(SymlinkCreated, &rdev->flags); - } list_del_rcu(&rdev->same_set); pr_debug("md: unbind<%pg>\n", rdev->bdev); mddev_destroy_serial_pool(rdev->mddev, rdev); - rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); sysfs_put(rdev->sysfs_unack_badblocks); @@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) if (err) export_rdev(rdev, mddev); mddev_unlock_and_resume(mddev); - if (!err) + if (!err) { + md_link_disk_holder(mddev); md_new_event(); + } return err ? err : len; } @@ -6606,6 +6620,7 @@ static void autorun_devices(int part) } autorun_array(mddev); mddev_unlock_and_resume(mddev); + md_link_disk_holder(mddev); } /* on success, candidates will be empty, on error * it won't... @@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, err != -EINVAL) mddev->hold_active = 0; - md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) : - mddev_unlock(mddev); + if (md_ioctl_need_suspend(cmd)) { + mddev_unlock_and_resume(mddev); + md_link_disk_holder(mddev); + } else { + mddev_unlock(mddev); + } out: if(did_set_md_closing) -- 2.39.2