[PATCH 3/7] bdev: implement freeze and thaw holder operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The old method of implementing block device freeze and thaw operations
required us to rely on get_active_super() to walk the list of all
superblocks on the system to find any superblock that might use the
block device. This is wasteful and not very pleasant overall.

Now that we can finally go straight from block device to owning
superblock things become way simpler. In fact, we can even get rid of
bd_fsfreeze_mutex and rely on the holder lock for freezing.

We let fs_bdev_thaw() grab its own active reference so we can hold
bd_holder_lock across freeze and thaw without risk of deadlocks. That in
turn allows us to get rid of bd_fsfreeze_mutex. This means
fs_bdev_thaw() might put the last reference to the superblock instead of
thaw_super(). This shouldn't be an issue. If it turns out to be one we
can reshuffle the code to simply hold s_umount when thaw_super() returns.

Thanks to Jan and Christoph who pointed out that bd_fsfreeze_mutex is
unnecessary now.

Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
---
 block/bdev.c              | 74 +++++++++++++++++--------------------
 fs/super.c                | 94 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/blk_types.h |  2 +-
 3 files changed, 119 insertions(+), 51 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 0d27db3e69e7..3deccd0ffcf2 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -222,32 +222,23 @@ EXPORT_SYMBOL(sync_blockdev_range);
  */
 int bdev_freeze(struct block_device *bdev)
 {
-	struct super_block *sb;
 	int error = 0;
 
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (++bdev->bd_fsfreeze_count > 1)
-		goto done;
+	mutex_lock(&bdev->bd_holder_lock);
 
-	sb = get_active_super(bdev);
-	if (!sb)
-		goto sync;
-	if (sb->s_op->freeze_super)
-		error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
-	else
-		error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
-	deactivate_super(sb);
+	if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
+		mutex_unlock(&bdev->bd_holder_lock);
+		return 0;
+	}
 
-	if (error) {
-		bdev->bd_fsfreeze_count--;
-		goto done;
+	if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
+		error = bdev->bd_holder_ops->freeze(bdev);
+		lockdep_assert_not_held(&bdev->bd_holder_lock);
+	} else {
+		sync_blockdev(bdev);
+		mutex_unlock(&bdev->bd_holder_lock);
 	}
-	bdev->bd_fsfreeze_sb = sb;
 
-sync:
-	sync_blockdev(bdev);
-done:
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return error;
 }
 EXPORT_SYMBOL(bdev_freeze);
@@ -262,31 +253,32 @@ EXPORT_SYMBOL(bdev_freeze);
  */
 int bdev_thaw(struct block_device *bdev)
 {
-	struct super_block *sb;
-	int error = -EINVAL;
+	int error = 0, nr_freeze;
 
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (!bdev->bd_fsfreeze_count)
-		goto out;
+	mutex_lock(&bdev->bd_holder_lock);
 
-	error = 0;
-	if (--bdev->bd_fsfreeze_count > 0)
-		goto out;
+	/*
+	 * If this returns < 0 it means that @bd_fsfreeze_count was
+	 * already 0 and no decrement was performed.
+	 */
+	nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
+	if (nr_freeze < 0) {
+		mutex_unlock(&bdev->bd_holder_lock);
+		return -EINVAL;
+	}
 
-	sb = bdev->bd_fsfreeze_sb;
-	if (!sb)
-		goto out;
+	if (nr_freeze > 0) {
+		mutex_unlock(&bdev->bd_holder_lock);
+		return 0;
+	}
+
+	if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
+		error = bdev->bd_holder_ops->thaw(bdev);
+		lockdep_assert_not_held(&bdev->bd_holder_lock);
+	} else {
+		mutex_unlock(&bdev->bd_holder_lock);
+	}
 
-	if (sb->s_op->thaw_super)
-		error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
-	else
-		error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
-	if (error)
-		bdev->bd_fsfreeze_count++;
-	else
-		bdev->bd_fsfreeze_sb = NULL;
-out:
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return error;
 }
 EXPORT_SYMBOL(bdev_thaw);
diff --git a/fs/super.c b/fs/super.c
index e54866345dc7..672f1837fbef 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1469,9 +1469,91 @@ static void fs_bdev_sync(struct block_device *bdev)
 	super_unlock_shared(sb);
 }
 
+static struct super_block *get_bdev_super(const struct block_device *bdev)
+{
+	struct super_block *sb_bdev = bdev->bd_holder, *sb = NULL;
+
+	if (!sb_bdev)
+		return NULL;
+	if (super_lock_excl(sb_bdev) && atomic_inc_not_zero(&sb_bdev->s_active))
+		sb = sb_bdev;
+	super_unlock_excl(sb_bdev);
+	return sb;
+}
+
+static int fs_bdev_freeze(struct block_device *bdev)
+	__releases(&bdev->bd_holder_lock)
+{
+	struct super_block *sb;
+	int error = 0;
+
+	lockdep_assert_held(&bdev->bd_holder_lock);
+
+	sb = get_bdev_super(bdev);
+	if (sb) {
+		if (sb->s_op->freeze_super)
+			error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+		else
+			error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+		if (error)
+			atomic_dec(&bdev->bd_fsfreeze_count);
+	}
+
+	/*
+	 * We have grabbed an active reference which means that the
+	 * superblock and the block device cannot go away. But we might
+	 * end up holding the last reference and so end up shutting the
+	 * superblock down. So drop @bdev->bd_holder_lock to avoid
+	 * deadlocks with blkdev_put().
+	 */
+	mutex_unlock(&bdev->bd_holder_lock);
+
+	if (sb)
+		deactivate_super(sb);
+
+	if (!error)
+		sync_blockdev(bdev);
+
+	return error;
+}
+
+static int fs_bdev_thaw(struct block_device *bdev)
+	__releases(&bdev->bd_holder_lock)
+{
+	struct super_block *sb;
+	int error;
+
+	lockdep_assert_held(&bdev->bd_holder_lock);
+
+	sb = get_bdev_super(bdev);
+	if (WARN_ON_ONCE(!sb))
+		return -EINVAL;
+
+	if (sb->s_op->thaw_super)
+		error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+	else
+		error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+	if (error)
+		atomic_inc(&bdev->bd_fsfreeze_count);
+
+	/*
+	 * We have grabbed an active reference which means that the
+	 * superblock and the block device cannot go away. But we might
+	 * end up holding the last reference and so end up shutting the
+	 * superblock down. So drop @bdev->bd_holder_lock to avoid
+	 * deadlocks with blkdev_put().
+	 */
+	mutex_unlock(&bdev->bd_holder_lock);
+	deactivate_super(sb);
+
+	return error;
+}
+
 const struct blk_holder_ops fs_holder_ops = {
 	.mark_dead		= fs_bdev_mark_dead,
 	.sync			= fs_bdev_sync,
+	.freeze			= fs_bdev_freeze,
+	.thaw			= fs_bdev_thaw,
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
@@ -1499,15 +1581,10 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
 	}
 
 	/*
-	 * Until SB_BORN flag is set, there can be no active superblock
-	 * references and thus no filesystem freezing. get_active_super() will
-	 * just loop waiting for SB_BORN so even bdev_freeze() cannot proceed.
-	 *
-	 * It is enough to check bdev was not frozen before we set s_bdev.
+	 * It is enough to check bdev was not frozen before we set
+	 * s_bdev as freezing will wait until SB_BORN is set.
 	 */
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+	if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
 		if (fc)
 			warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
 		blkdev_put(bdev, sb);
@@ -1519,7 +1596,6 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
 	if (bdev_stable_writes(bdev))
 		sb->s_iflags |= SB_I_STABLE_WRITES;
 	spin_unlock(&sb_lock);
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 
 	snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
 	shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d5c5e59ddbd2..88e1848b0869 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -57,7 +57,7 @@ struct block_device {
 	const struct blk_holder_ops *bd_holder_ops;
 	struct mutex		bd_holder_lock;
 	/* The counter of freeze processes */
-	int			bd_fsfreeze_count;
+	atomic_t		bd_fsfreeze_count;
 	int			bd_holders;
 	struct kobject		*bd_holder_dir;
 

-- 
2.34.1




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux