On Tue, Jun 20, 2023 at 11:31 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > Commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") introduce a > new lock 'delete_mutex', and trigger a new deadlock: > > t1: remove rdev t2: sysfs writer > > rdev_attr_store rdev_attr_store > mddev_lock > state_store > md_kick_rdev_from_array > lock delete_mutex > list_add mddev->deleting > unlock delete_mutex > mddev_unlock > mddev_lock > ... > lock delete_mutex > kobject_del > // wait for sysfs writers to be done > mddev_unlock > lock delete_mutex > // wait for delete_mutex, deadlock > > 'delete_mutex' is used to protect the list 'mddev->deleting', turns out > that this list can be protected by 'reconfig_mutex' directly, and this > lock can be removed. > > Fix this problem by removing the lock, and use 'reconfig_mutex' to > protect the list. mddev_unlock() will move this list to a local list to > be handled after 'reconfig_mutex' is dropped. > > Fixes: 3ce94ce5d05a ("md: fix duplicate filename for rdev") > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> Applied to md-next. Thanks for the quick fix! Song > --- > drivers/md/md.c | 28 +++++++++------------------- > drivers/md/md.h | 4 +--- > 2 files changed, 10 insertions(+), 22 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 1086d7282ee7..089f7d7a9052 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev) > { > mutex_init(&mddev->open_mutex); > mutex_init(&mddev->reconfig_mutex); > - mutex_init(&mddev->delete_mutex); > mutex_init(&mddev->sync_mutex); > mutex_init(&mddev->bitmap_info.mutex); > INIT_LIST_HEAD(&mddev->disks); > @@ -751,26 +750,15 @@ static void mddev_free(struct mddev *mddev) > > static const struct attribute_group md_redundancy_group; > > -static void md_free_rdev(struct mddev *mddev) > +void mddev_unlock(struct mddev *mddev) > { > struct md_rdev *rdev; > struct md_rdev *tmp; > + LIST_HEAD(delete); > > - mutex_lock(&mddev->delete_mutex); > - if (list_empty(&mddev->deleting)) > - goto out; > + if (!list_empty(&mddev->deleting)) > + list_splice_init(&mddev->deleting, &delete); > > - list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) { > - list_del_init(&rdev->same_set); > - kobject_del(&rdev->kobj); > - export_rdev(rdev, mddev); > - } > -out: > - mutex_unlock(&mddev->delete_mutex); > -} > - > -void mddev_unlock(struct mddev *mddev) > -{ > if (mddev->to_remove) { > /* These cannot be removed under reconfig_mutex as > * an access to the files will try to take reconfig_mutex > @@ -810,7 +798,11 @@ void mddev_unlock(struct mddev *mddev) > } else > mutex_unlock(&mddev->reconfig_mutex); > > - md_free_rdev(mddev); > + list_for_each_entry_safe(rdev, tmp, &delete, same_set) { > + list_del_init(&rdev->same_set); > + kobject_del(&rdev->kobj); > + export_rdev(rdev, mddev); > + } > > md_wakeup_thread(mddev->thread); > wake_up(&mddev->sb_wait); > @@ -2490,9 +2482,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) > * reconfig_mutex is held, hence it can't be called under > * reconfig_mutex and it's delayed to mddev_unlock(). > */ > - mutex_lock(&mddev->delete_mutex); > list_add(&rdev->same_set, &mddev->deleting); > - mutex_unlock(&mddev->delete_mutex); > } > > static void export_array(struct mddev *mddev) > diff --git a/drivers/md/md.h b/drivers/md/md.h > index 892a598a5029..8ae957480976 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -531,11 +531,9 @@ struct mddev { > > /* > * Temporarily store rdev that will be finally removed when > - * reconfig_mutex is unlocked. > + * reconfig_mutex is unlocked, protected by reconfig_mutex. > */ > struct list_head deleting; > - /* Protect the deleting list */ > - struct mutex delete_mutex; > > /* Used to synchronize idle and frozen for action_store() */ > struct mutex sync_mutex; > -- > 2.39.2 >