Re: [PATCH v2 05/10] bdev: implement freeze and thaw holder operations

Jan Kara <jack@xxxxxxx> · Wed, 25 Oct 2023 16:01:05 +0200

On Tue 24-10-23 15:01:11, Christian Brauner wrote:
> The old method of implementing block device freeze and thaw operations
> required us to rely on get_active_super() to walk the list of all
> superblocks on the system to find any superblock that might use the
> block device. This is wasteful and not very pleasant overall.
> 
> Now that we can finally go straight from block device to owning
> superblock things become way simpler.
> 
> Link: https://lore.kernel.org/r/20230927-vfs-super-freeze-v1-3-ecc36d9ab4d9@xxxxxxxxxx
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>

Some comments below:

> diff --git a/block/bdev.c b/block/bdev.c
> index a3e2af580a73..9deacd346192 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -222,31 +222,24 @@ EXPORT_SYMBOL(sync_blockdev_range);
>   */
>  int bdev_freeze(struct block_device *bdev)
>  {
> -	struct super_block *sb;
>  	int error = 0;
>  
>  	mutex_lock(&bdev->bd_fsfreeze_mutex);
> -	if (++bdev->bd_fsfreeze_count > 1)
> -		goto done;
> -
> -	sb = get_active_super(bdev);
> -	if (!sb)
> -		goto sync;
> -	if (sb->s_op->freeze_super)
> -		error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> -	else
> -		error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> -	deactivate_super(sb);
>  
> -	if (error) {
> -		bdev->bd_fsfreeze_count--;
> -		goto done;
> +	if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
> +		mutex_unlock(&bdev->bd_fsfreeze_mutex);
> +		return 0;
> +	}
> +
> +	mutex_lock(&bdev->bd_holder_lock);
> +	if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
> +		error = bdev->bd_holder_ops->freeze(bdev);
> +		lockdep_assert_not_held(&bdev->bd_holder_lock);
> +	} else {
> +		mutex_unlock(&bdev->bd_holder_lock);
> +		error = sync_blockdev(bdev);
>  	}
> -	bdev->bd_fsfreeze_sb = sb;
>  
> -sync:
> -	error = sync_blockdev(bdev);
> -done:
>  	mutex_unlock(&bdev->bd_fsfreeze_mutex);
>  	return error;

It's somewhat strange that if we return error here, we still keep
bd_fsfreeze_count elevated. So I'd add here:

	if (error)
		atomic_dec(&bdev->bd_fsfreeze_count);

> @@ -262,29 +255,30 @@ EXPORT_SYMBOL(bdev_freeze);
>   */
>  int bdev_thaw(struct block_device *bdev)
>  {
> -	struct super_block *sb;
> -	int error = -EINVAL;
> +	int error = -EINVAL, nr_freeze;
>  
>  	mutex_lock(&bdev->bd_fsfreeze_mutex);
> -	if (!bdev->bd_fsfreeze_count)
> +
> +	/*
> +	 * If this returns < 0 it means that @bd_fsfreeze_count was
> +	 * already 0 and no decrement was performed.
> +	 */
> +	nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
> +	if (nr_freeze < 0)
>  		goto out;
>  
>  	error = 0;
> -	if (--bdev->bd_fsfreeze_count > 0)
> +	if (nr_freeze > 0)
>  		goto out;
>  
> -	sb = bdev->bd_fsfreeze_sb;
> -	if (!sb)
> -		goto out;
> +	mutex_lock(&bdev->bd_holder_lock);
> +	if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
> +		error = bdev->bd_holder_ops->thaw(bdev);
> +		lockdep_assert_not_held(&bdev->bd_holder_lock);
> +	} else {
> +		mutex_unlock(&bdev->bd_holder_lock);
> +	}
>  
> -	if (sb->s_op->thaw_super)
> -		error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> -	else
> -		error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> -	if (error)
> -		bdev->bd_fsfreeze_count++;
> -	else
> -		bdev->bd_fsfreeze_sb = NULL;

Here I'd also keep the error handling behavior and increment
bd_fsfreeze_count in case of error from ->thaw().

> diff --git a/fs/super.c b/fs/super.c
> index b224182f2440..ee0795ce09c7 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -1430,14 +1430,8 @@ struct super_block *sget_dev(struct fs_context *fc, dev_t dev)
>  EXPORT_SYMBOL(sget_dev);
>  
>  #ifdef CONFIG_BLOCK
> -/*
> - * Lock the superblock that is holder of the bdev. Returns the superblock
> - * pointer if we successfully locked the superblock and it is alive. Otherwise
> - * we return NULL and just unlock bdev->bd_holder_lock.
> - *
> - * The function must be called with bdev->bd_holder_lock and releases it.
> - */
> -static struct super_block *bdev_super_lock_shared(struct block_device *bdev)
> +
> +static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl)
>  	__releases(&bdev->bd_holder_lock)
>  {
>  	struct super_block *sb = bdev->bd_holder;
> @@ -1451,18 +1445,37 @@ static struct super_block *bdev_super_lock_shared(struct block_device *bdev)
>  	spin_lock(&sb_lock);
>  	sb->s_count++;
>  	spin_unlock(&sb_lock);
> +
>  	mutex_unlock(&bdev->bd_holder_lock);
>  
> -	locked = super_lock_shared(sb);
> -	if (!locked || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
> -		put_super(sb);
> +	locked = super_lock(sb, excl);
> +	put_super(sb);

Perhaps you can preserve the comment before put_super() here like:
	/*
	 * The superblock is not SB_DYING and we hold s_umount, we can drop
	 * our temporary reference now.
	 */

> +	if (!locked)
> +		return NULL;
> +
> +	return sb;
> +}
> +
> +/*
> + * Lock the superblock that is holder of the bdev. Returns the superblock
> + * pointer if we successfully locked the superblock and it is alive. Otherwise
> + * we return NULL and just unlock bdev->bd_holder_lock.
> + *
> + * The function must be called with bdev->bd_holder_lock and releases it.
> + */
> +static struct super_block *bdev_super_lock_shared(struct block_device *bdev)
> +{
> +	struct super_block *sb;
> +
> +	sb = bdev_super_lock(bdev, false);
> +	if (!sb)
> +		return NULL;
> +
> +	if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
> +		super_unlock_shared(sb);

Any reason why you didn't keep these checks in bdev_super_lock()? The use
in get_bdev_super() is actually limited to active superblocks anyway. And
then we can just get rid of bdev_super_lock_shared() and use
bdev_super_lock(bdev, false) instead.

> @@ -1495,9 +1508,76 @@ static void fs_bdev_sync(struct block_device *bdev)
>  	super_unlock_shared(sb);
>  }
>  
> +static struct super_block *get_bdev_super(struct block_device *bdev)
> +{
> +	bool active = false;
> +	struct super_block *sb;
> +
> +	sb = bdev_super_lock(bdev, true);
> +	if (sb) {
> +		active = atomic_inc_not_zero(&sb->s_active);
> +		super_unlock_excl(sb);
> +	}
> +	if (!active)
> +		return NULL;
> +	return sb;
> +}
> +
> +static int fs_bdev_freeze(struct block_device *bdev)
> +{
> +	struct super_block *sb;
> +	int error = 0;
> +
> +	lockdep_assert_held(&bdev->bd_fsfreeze_mutex);
> +
> +	if (WARN_ON_ONCE(unlikely(!bdev->bd_holder)))
> +		return -EINVAL;

This is going to break assumptions that bd_holder_lock is dropped by the
->freeze callback. Also the check seems a bit pointless to me given the
single callsite but whatever.

> +
> +	sb = get_bdev_super(bdev);
> +	if (!sb)
> +		return -EINVAL;
> +
> +	if (sb->s_op->freeze_super)
> +		error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> +	else
> +		error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> +	if (error)
> +		atomic_dec(&bdev->bd_fsfreeze_count);

This IMHO belongs into bdev_freeze().

> +	if (!error)
> +		error = sync_blockdev(bdev);
> +	deactivate_super(sb);
> +	return error;
> +}
> +
> +static int fs_bdev_thaw(struct block_device *bdev)
> +{
> +	struct super_block *sb;
> +	int error;
> +
> +	lockdep_assert_held(&bdev->bd_fsfreeze_mutex);
> +
> +	if (WARN_ON_ONCE(unlikely(!bdev->bd_holder)))
> +		return -EINVAL;

The same comment about bd_holder_lock applies here...

> +
> +	sb = get_bdev_super(bdev);
> +	if (WARN_ON_ONCE(!sb))
> +		return -EINVAL;
> +
> +	if (sb->s_op->thaw_super)
> +		error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> +	else
> +		error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> +	if (error)
> +		atomic_inc(&bdev->bd_fsfreeze_count);

And this error handling belongs into bdev_thaw().

> +	deactivate_super(sb);
> +	return error;
> +}
> +

								Honza
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR