[PATCH 2/2] md: create symlink with disk holder after mddev resume

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Li Nan <linan122@xxxxxxxxxx>

There is a risk of deadlock when a process gets disk->open_mutex after
suspending mddev, because other processes may hold open_mutex while
submitting io. For example:

  T1				T2
  blkdev_open
   bdev_open_by_dev
    mutex_lock(&disk->open_mutex)
  				md_ioctl
  				 mddev_suspend_and_lock
  				  mddev_suspend
  				 md_add_new_disk
  				  bind_rdev_to_array
  				   bd_link_disk_holder
  				    //wait open_mutex
    blkdev_get_whole
     bdev_disk_changed
      efi_partition
       read_lba
        ...
         md_submit_bio
          md_handle_request
           //wait resume

Fix it by getting disk->open_mutex after mddev resume, iterating each
mddev->disk to create symlink for rdev which has not been created yet.
and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been
deleted from mddev->disks here, which can avoid concurrent bind and unbind,

Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls involed array reconfiguration")
Signed-off-by: Li Nan <linan122@xxxxxxxxxx>
---
 drivers/md/md.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index d6612b922c76..c128570f2a5d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(mddev_resume);
 
+static void md_link_disk_holder(struct mddev *mddev)
+{
+	struct md_rdev *rdev;
+
+	rcu_read_lock();
+	rdev_for_each_rcu(rdev, mddev) {
+		if (test_bit(SymlinkCreated, &rdev->flags))
+			continue;
+		if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
+			set_bit(SymlinkCreated, &rdev->flags);
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Generic flush handling for md
  */
@@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev)
 
 	list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
 		list_del_init(&rdev->same_set);
+		if (test_bit(SymlinkCreated, &rdev->flags)) {
+			bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
+			clear_bit(SymlinkCreated, &rdev->flags);
+		}
+		rdev->mddev = NULL;
 		kobject_del(&rdev->kobj);
 		export_rdev(rdev, mddev);
 	}
@@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
 		sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
 
 	list_add_rcu(&rdev->same_set, &mddev->disks);
-	if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
-		set_bit(SymlinkCreated, &rdev->flags);
 
 	/* May as well allow recovery to be retried once */
 	mddev->recovery_disabled++;
@@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
 {
 	struct mddev *mddev = rdev->mddev;
 
-	if (test_bit(SymlinkCreated, &rdev->flags)) {
-		bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
-		clear_bit(SymlinkCreated, &rdev->flags);
-	}
 	list_del_rcu(&rdev->same_set);
 	pr_debug("md: unbind<%pg>\n", rdev->bdev);
 	mddev_destroy_serial_pool(rdev->mddev, rdev);
-	rdev->mddev = NULL;
 	sysfs_remove_link(&rdev->kobj, "block");
 	sysfs_put(rdev->sysfs_state);
 	sysfs_put(rdev->sysfs_unack_badblocks);
@@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
 	if (err)
 		export_rdev(rdev, mddev);
 	mddev_unlock_and_resume(mddev);
-	if (!err)
+	if (!err) {
+		md_link_disk_holder(mddev);
 		md_new_event();
+	}
 	return err ? err : len;
 }
 
@@ -6606,6 +6620,7 @@ static void autorun_devices(int part)
 			}
 			autorun_array(mddev);
 			mddev_unlock_and_resume(mddev);
+			md_link_disk_holder(mddev);
 		}
 		/* on success, candidates will be empty, on error
 		 * it won't...
@@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
 	    err != -EINVAL)
 		mddev->hold_active = 0;
 
-	md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) :
-				     mddev_unlock(mddev);
+	if (md_ioctl_need_suspend(cmd)) {
+		mddev_unlock_and_resume(mddev);
+		md_link_disk_holder(mddev);
+	} else {
+		mddev_unlock(mddev);
+	}
 
 out:
 	if(did_set_md_closing)
-- 
2.39.2





[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux