Re: [PATCH v3 4/4] super: wait until we passed kill super

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri 18-08-23 16:00:51, Christian Brauner wrote:
> Recent rework moved block device closing out of sb->put_super() and into
> sb->kill_sb() to avoid deadlocks as s_umount is held in put_super() and
> blkdev_put() can end up taking s_umount again.
> 
> That means we need to move the removal of the superblock from @fs_supers
> out of generic_shutdown_super() and into deactivate_locked_super() to
> ensure that concurrent mounters don't fail to open block devices that
> are still in use because blkdev_put() in sb->kill_sb() hasn't been
> called yet.
> 
> We can now do this as we can make iterators through @fs_super and
> @super_blocks wait without holding s_umount. Concurrent mounts will wait
> until a dying superblock is fully dead so until sb->kill_sb() has been
> called and SB_DEAD been set. Concurrent iterators can already discard
> any SB_DYING superblock.
> 
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@xxxxxxx>

								Honza

> ---
>  fs/super.c         | 71 ++++++++++++++++++++++++++++++++++++++++++++++++------
>  include/linux/fs.h |  1 +
>  2 files changed, 65 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/super.c b/fs/super.c
> index 896f05f34377..015e428818ce 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -153,7 +153,7 @@ static inline bool super_lock_excl(struct super_block *sb)
>  }
>  
>  /* wake waiters */
> -#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING)
> +#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
>  static void super_wake(struct super_block *sb, unsigned int flag)
>  {
>  	WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
> @@ -457,6 +457,25 @@ void deactivate_locked_super(struct super_block *s)
>  		list_lru_destroy(&s->s_dentry_lru);
>  		list_lru_destroy(&s->s_inode_lru);
>  
> +		/*
> +		 * Remove it from @fs_supers so it isn't found by new
> +		 * sget{_fc}() walkers anymore. Any concurrent mounter still
> +		 * managing to grab a temporary reference is guaranteed to
> +		 * already see SB_DYING and will wait until we notify them about
> +		 * SB_DEAD.
> +		 */
> +		spin_lock(&sb_lock);
> +		hlist_del_init(&s->s_instances);
> +		spin_unlock(&sb_lock);
> +
> +		/*
> +		 * Let concurrent mounts know that this thing is really dead.
> +		 * We don't need @sb->s_umount here as every concurrent caller
> +		 * will see SB_DYING and either discard the superblock or wait
> +		 * for SB_DEAD.
> +		 */
> +		super_wake(s, SB_DEAD);
> +
>  		put_filesystem(fs);
>  		put_super(s);
>  	} else {
> @@ -513,6 +532,45 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
>  	return 0;
>  }
>  
> +static inline bool wait_dead(struct super_block *sb)
> +{
> +	unsigned int flags;
> +
> +	/*
> +	 * Pairs with memory barrier in super_wake() and ensures
> +	 * that we see SB_DEAD after we're woken.
> +	 */
> +	flags = smp_load_acquire(&sb->s_flags);
> +	return flags & SB_DEAD;
> +}
> +
> +/**
> + * grab_super_dead - acquire an active reference to a superblock
> + * @sb: superblock to acquire
> + *
> + * Acquire a temporary reference on a superblock and try to trade it for
> + * an active reference. This is used in sget{_fc}() to wait for a
> + * superblock to either become SB_BORN or for it to pass through
> + * sb->kill() and be marked as SB_DEAD.
> + *
> + * Return: This returns true if an active reference could be acquired,
> + *         false if not.
> + */
> +static bool grab_super_dead(struct super_block *sb)
> +{
> +
> +	sb->s_count++;
> +	if (grab_super(sb)) {
> +		put_super(sb);
> +		lockdep_assert_held(&sb->s_umount);
> +		return true;
> +	}
> +	wait_var_event(&sb->s_flags, wait_dead(sb));
> +	put_super(sb);
> +	lockdep_assert_not_held(&sb->s_umount);
> +	return false;
> +}
> +
>  /*
>   *	super_trylock_shared - try to grab ->s_umount shared
>   *	@sb: reference we are trying to grab
> @@ -639,15 +697,14 @@ void generic_shutdown_super(struct super_block *sb)
>  			spin_unlock(&sb->s_inode_list_lock);
>  		}
>  	}
> -	spin_lock(&sb_lock);
> -	/* should be initialized for __put_super_and_need_restart() */
> -	hlist_del_init(&sb->s_instances);
> -	spin_unlock(&sb_lock);
>  	/*
>  	 * Broadcast to everyone that grabbed a temporary reference to this
>  	 * superblock before we removed it from @fs_supers that the superblock
>  	 * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
>  	 * discard this superblock and treat it as dead.
> +	 *
> +	 * We leave the superblock on @fs_supers so it can be found by
> +	 * sget{_fc}() until we passed sb->kill_sb().
>  	 */
>  	super_wake(sb, SB_DYING);
>  	super_unlock_excl(sb);
> @@ -742,7 +799,7 @@ struct super_block *sget_fc(struct fs_context *fc,
>  		destroy_unused_super(s);
>  		return ERR_PTR(-EBUSY);
>  	}
> -	if (!grab_super(old))
> +	if (!grab_super_dead(old))
>  		goto retry;
>  	destroy_unused_super(s);
>  	return old;
> @@ -786,7 +843,7 @@ struct super_block *sget(struct file_system_type *type,
>  				destroy_unused_super(s);
>  				return ERR_PTR(-EBUSY);
>  			}
> -			if (!grab_super(old))
> +			if (!grab_super_dead(old))
>  				goto retry;
>  			destroy_unused_super(s);
>  			return old;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 173672645156..a63da68305e9 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown);
>  #define SB_LAZYTIME     BIT(25)	/* Update the on-disk [acm]times lazily */
>  
>  /* These sb flags are internal to the kernel */
> +#define SB_DEAD         BIT(21)
>  #define SB_DYING        BIT(24)
>  #define SB_SUBMOUNT     BIT(26)
>  #define SB_FORCE        BIT(27)
> 
> -- 
> 2.34.1
> 
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux