On Fri 18-08-23 16:00:51, Christian Brauner wrote: > Recent rework moved block device closing out of sb->put_super() and into > sb->kill_sb() to avoid deadlocks as s_umount is held in put_super() and > blkdev_put() can end up taking s_umount again. > > That means we need to move the removal of the superblock from @fs_supers > out of generic_shutdown_super() and into deactivate_locked_super() to > ensure that concurrent mounters don't fail to open block devices that > are still in use because blkdev_put() in sb->kill_sb() hasn't been > called yet. > > We can now do this as we can make iterators through @fs_super and > @super_blocks wait without holding s_umount. Concurrent mounts will wait > until a dying superblock is fully dead so until sb->kill_sb() has been > called and SB_DEAD been set. Concurrent iterators can already discard > any SB_DYING superblock. > > Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> Looks good. Feel free to add: Reviewed-by: Jan Kara <jack@xxxxxxx> Honza > --- > fs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++------ > include/linux/fs.h | 1 + > 2 files changed, 65 insertions(+), 7 deletions(-) > > diff --git a/fs/super.c b/fs/super.c > index 896f05f34377..015e428818ce 100644 > --- a/fs/super.c > +++ b/fs/super.c > @@ -153,7 +153,7 @@ static inline bool super_lock_excl(struct super_block *sb) > } > > /* wake waiters */ > -#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING) > +#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD) > static void super_wake(struct super_block *sb, unsigned int flag) > { > WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS)); > @@ -457,6 +457,25 @@ void deactivate_locked_super(struct super_block *s) > list_lru_destroy(&s->s_dentry_lru); > list_lru_destroy(&s->s_inode_lru); > > + /* > + * Remove it from @fs_supers so it isn't found by new > + * sget{_fc}() walkers anymore. Any concurrent mounter still > + * managing to grab a temporary reference is guaranteed to > + * already see SB_DYING and will wait until we notify them about > + * SB_DEAD. > + */ > + spin_lock(&sb_lock); > + hlist_del_init(&s->s_instances); > + spin_unlock(&sb_lock); > + > + /* > + * Let concurrent mounts know that this thing is really dead. > + * We don't need @sb->s_umount here as every concurrent caller > + * will see SB_DYING and either discard the superblock or wait > + * for SB_DEAD. > + */ > + super_wake(s, SB_DEAD); > + > put_filesystem(fs); > put_super(s); > } else { > @@ -513,6 +532,45 @@ static int grab_super(struct super_block *s) __releases(sb_lock) > return 0; > } > > +static inline bool wait_dead(struct super_block *sb) > +{ > + unsigned int flags; > + > + /* > + * Pairs with memory barrier in super_wake() and ensures > + * that we see SB_DEAD after we're woken. > + */ > + flags = smp_load_acquire(&sb->s_flags); > + return flags & SB_DEAD; > +} > + > +/** > + * grab_super_dead - acquire an active reference to a superblock > + * @sb: superblock to acquire > + * > + * Acquire a temporary reference on a superblock and try to trade it for > + * an active reference. This is used in sget{_fc}() to wait for a > + * superblock to either become SB_BORN or for it to pass through > + * sb->kill() and be marked as SB_DEAD. > + * > + * Return: This returns true if an active reference could be acquired, > + * false if not. > + */ > +static bool grab_super_dead(struct super_block *sb) > +{ > + > + sb->s_count++; > + if (grab_super(sb)) { > + put_super(sb); > + lockdep_assert_held(&sb->s_umount); > + return true; > + } > + wait_var_event(&sb->s_flags, wait_dead(sb)); > + put_super(sb); > + lockdep_assert_not_held(&sb->s_umount); > + return false; > +} > + > /* > * super_trylock_shared - try to grab ->s_umount shared > * @sb: reference we are trying to grab > @@ -639,15 +697,14 @@ void generic_shutdown_super(struct super_block *sb) > spin_unlock(&sb->s_inode_list_lock); > } > } > - spin_lock(&sb_lock); > - /* should be initialized for __put_super_and_need_restart() */ > - hlist_del_init(&sb->s_instances); > - spin_unlock(&sb_lock); > /* > * Broadcast to everyone that grabbed a temporary reference to this > * superblock before we removed it from @fs_supers that the superblock > * is dying. Every walker of @fs_supers outside of sget{_fc}() will now > * discard this superblock and treat it as dead. > + * > + * We leave the superblock on @fs_supers so it can be found by > + * sget{_fc}() until we passed sb->kill_sb(). > */ > super_wake(sb, SB_DYING); > super_unlock_excl(sb); > @@ -742,7 +799,7 @@ struct super_block *sget_fc(struct fs_context *fc, > destroy_unused_super(s); > return ERR_PTR(-EBUSY); > } > - if (!grab_super(old)) > + if (!grab_super_dead(old)) > goto retry; > destroy_unused_super(s); > return old; > @@ -786,7 +843,7 @@ struct super_block *sget(struct file_system_type *type, > destroy_unused_super(s); > return ERR_PTR(-EBUSY); > } > - if (!grab_super(old)) > + if (!grab_super_dead(old)) > goto retry; > destroy_unused_super(s); > return old; > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 173672645156..a63da68305e9 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown); > #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ > > /* These sb flags are internal to the kernel */ > +#define SB_DEAD BIT(21) > #define SB_DYING BIT(24) > #define SB_SUBMOUNT BIT(26) > #define SB_FORCE BIT(27) > > -- > 2.34.1 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR