As things stand now a filesystem frozen through the in-kernel bdev level API can be thawed using the userspace sb level API, which can lead to accidental corruption of filesystem snapshots and backups. To address this problem we modify the in-kernel API so that we can tell fsfreeze that a kernel initiated freeze is in progress and that the filesystem should not be thawed no matter how many times the FITHAW ioctl is invoked. Cc: linux-fsdevel@xxxxxxxxxxxxxxx Cc: Josef Bacik <jbacik@xxxxxxxxxxxx> Cc: Eric Sandeen <sandeen@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx> Cc: Dave Chinner <dchinner@xxxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: Luiz Capitulino <lcapitulino@xxxxxxxxxx> Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx> --- diff -urNp linux-3.8-rc1-orig/fs/block_dev.c linux-3.8-rc1/fs/block_dev.c --- linux-3.8-rc1-orig/fs/block_dev.c 2012-12-25 16:22:48.268018000 +0900 +++ linux-3.8-rc1/fs/block_dev.c 2012-12-25 16:32:09.712018000 +0900 @@ -238,7 +238,7 @@ struct super_block *freeze_bdev(struct b sb = get_active_super(bdev); if (!sb) goto out; - error = freeze_super(sb); + error = __freeze_super(sb, true); if (error) { deactivate_super(sb); bdev->bd_fsfreeze_count--; @@ -265,6 +265,7 @@ int thaw_bdev(struct block_device *bdev, int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); + if (!bdev->bd_fsfreeze_count) goto out; @@ -273,20 +274,10 @@ int thaw_bdev(struct block_device *bdev, goto out; } - error = thaw_super(sb); - /* - * If the superblock is already unfrozen, i.e. thaw_super() returned - * -EINVAL, we consider the block device level thaw successful. This - * behavior is important in a scenario where a filesystem frozen using - * freeze_bdev() is thawed through the superblock level API; if we - * caused the subsequent thaw_bdev() to fail bdev->bd_fsfreeze_count - * would not go back to 0 which means that future calls to freeze_bdev() - * would not freeze the superblock, just increase the counter. - */ - if (error && error != -EINVAL) + error = __thaw_super(sb, true); + + if (error) bdev->bd_fsfreeze_count++; - else - error = 0; out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; diff -urNp linux-3.8-rc1-orig/fs/namespace.c linux-3.8-rc1/fs/namespace.c --- linux-3.8-rc1-orig/fs/namespace.c 2012-12-25 16:31:10.780018000 +0900 +++ linux-3.8-rc1/fs/namespace.c 2012-12-25 16:32:09.712018000 +0900 @@ -1103,6 +1103,11 @@ static void thaw_mount(struct mount *mnt * superblock succeeds (once it has been detached the fsfreeze * ioctls become unusable). Thus, force-thaw sb so that all tasks * in fsfreeze wait queue are woken up. + * + * thaw_super_force() does not actually thaw the sb if the freeze + * counter was locked (i.e. was frozen through the block device + * level API). In such a case the freeze counter is set to one + * thus guaranteeing that the sb will get thawed unlock time. */ thaw_super_force(sb); /* Drops superblock lock. */ } diff -urNp linux-3.8-rc1-orig/fs/super.c linux-3.8-rc1/fs/super.c --- linux-3.8-rc1-orig/fs/super.c 2012-12-25 16:31:10.780018000 +0900 +++ linux-3.8-rc1/fs/super.c 2012-12-25 16:32:09.712018000 +0900 @@ -1301,15 +1301,20 @@ static void sb_wait_write(struct super_b } /** - * freeze_super - lock the filesystem and force it into a consistent state + * __freeze_super - lock the filesystem and force it into a consistent state * @sb: the super to lock + * @lock: should we lock the freeze counter? * * Syncs the super to make sure the filesystem is consistent and calls the fs's - * freeze_fs. The reference counter (s_freeze_count) guarantees that only the - * last unfreeze process can unfreeze the frozen filesystem actually when - * multiple freeze requests arrive simultaneously. It counts up in - * freeze_super() and counts down in thaw_super(). When it becomes 0, - * thaw_super() will execute the unfreeze. + * freeze_fs. Freezes can nest which has two implications: the filesystem level + * freeze occurs during the first nested freeze, the actual filesystem thaw + * occurs only when the last thaw operation brings the freeze counter down to + * zero. + * + * If @lock is true the freeze counter is increased after a successful freeze + * but it cannot go back to zero (and the filesystem get actually thawed) until + * the the counter is unlocked using this function's thaw counterpart. The + * freeze counter lock does not nest. * * During this function, sb->s_writers.frozen goes through these values: * @@ -1334,15 +1339,24 @@ static void sb_wait_write(struct super_b * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is * mostly auxiliary for filesystems to verify they do not modify frozen fs. * - * sb->s_writers.frozen and sb->s_freeze_count are protected by sb->s_umount. + * sb->s_writers.frozen, sb->s_freeze_count and sb->s_freeze_locked are + * protected by sb->s_umount. */ -int freeze_super(struct super_block *sb) +int __freeze_super(struct super_block *sb, bool lock) { int ret = 0; + bool locked_old = sb->s_freeze_locked; atomic_inc(&sb->s_active); down_write(&sb->s_umount); + /* The freeze counter lock does not nest. */ + if (sb->s_freeze_locked && lock) { + ret = -EBUSY; + goto out_deactivate; + } + + sb->s_freeze_locked = lock ? true : sb->s_freeze_locked; if (++sb->s_freeze_count > 1) goto out_deactivate; @@ -1390,6 +1404,7 @@ int freeze_super(struct super_block *sb) if (ret) { printk(KERN_ERR "VFS:Filesystem freeze failed\n"); + sb->s_freeze_locked = locked_old; sb->s_freeze_count--; sb->s_writers.frozen = SB_UNFROZEN; smp_wmb(); @@ -1397,11 +1412,13 @@ int freeze_super(struct super_block *sb) goto out_deactivate; } } + /* * This is just for debugging purposes so that fs can warn if it * sees write activity when frozen is set to SB_FREEZE_COMPLETE. */ sb->s_writers.frozen = SB_FREEZE_COMPLETE; + out_unlock: up_write(&sb->s_umount); return ret; @@ -1409,6 +1426,18 @@ out_deactivate: deactivate_locked_super(sb); return ret; } + +/** + * freeze_super - lock the filesystem and force it into a consistent state + * @sb: the super to lock + * + * This is a wrapper around __freeze_super() which does the actual work of + * freezing the filesystem. fsfreeze counter lock is not requested. + */ +int freeze_super(struct super_block *sb) +{ + return __freeze_super(sb, false); +} EXPORT_SYMBOL(freeze_super); /** @@ -1449,34 +1478,56 @@ out: } /** - * thaw_super - unlock filesystem + * __thaw_super - unlock filesystem * @sb: the super to thaw + * @unlock: should we unlock the freeze counter? * - * Unlocks the filesystem and marks it writeable again after freeze_super(). + * Tries to decrease the freeze counter and when it reaches zero unlocks the + * filesystem and marks it writeable again. If the counter is locked it cannot + * go back to zero (and thus trigger the actual filesystem thaw) unless @unlock + * is true. * * Returns -EINVAL if @sb is not frozen, 0 if it succeeded or the corresponding * error code otherwise. If the unfreeze fails, @sb is left in the frozen state. */ -int thaw_super(struct super_block *sb) +int __thaw_super(struct super_block *sb, bool unlock) { int error = 0; down_write(&sb->s_umount); - if (!sb->s_freeze_count) { + /* + * An unfrozen filesystem cannot be thawed. Similarly, an unlocked + * freeze counter cannot be unlocked. + */ + if (!sb->s_freeze_count || (!sb->s_freeze_locked && unlock)) { error = -EINVAL; goto out_unlock; } - if (--sb->s_freeze_count > 0) + /* + * Freezes nest so only the last call (freeze counter down to one) can + * trigger the actual filesystem thaw. + */ + if (sb->s_freeze_count > 1) { + sb->s_freeze_count--; + sb->s_freeze_locked = unlock ? false: sb->s_freeze_locked; + goto out_unlock; + } + /* A locked filesystem cannot be thawed unless unlock was requested. */ + else if (sb->s_freeze_locked && !unlock) { + error = -EINVAL; goto out_unlock; + } error = raw_thaw_super(sb, false); - if (error) { - sb->s_freeze_count++; - goto out_unlock; + if (!error) { + sb->s_freeze_count = 0; + sb->s_freeze_locked = false; } + else + goto out_unlock; /* Active reference released after last thaw. */ deactivate_locked_super(sb); @@ -1486,6 +1537,19 @@ out_unlock: up_write(&sb->s_umount); return error; } + +/** + * thaw_super - unlock filesystem + * @sb: the super to unlock + * + * This is a wrapper around __thaw_super() which does the actual work of + * thawing the filesystem. Release of the fsfreeze counter lock is not + * requested. + */ +int thaw_super(struct super_block *sb) +{ + return __thaw_super(sb, false); +} EXPORT_SYMBOL(thaw_super); /** @@ -1505,10 +1569,19 @@ int thaw_super_force(struct super_block up_write(&sb->s_umount); return -EINVAL; } + + if (sb->s_freeze_locked) { + /* Ensure superblock gets thawed at unlock time */ + sb->s_freeze_count = 1; + up_write(&sb->s_umount); + return -EINVAL; + } + sb->s_freeze_count = 0; raw_thaw_super(sb, true); /* Active reference released after last thaw. */ deactivate_locked_super(sb); + return 0; } diff -urNp linux-3.8-rc1-orig/include/linux/fs.h linux-3.8-rc1/include/linux/fs.h --- linux-3.8-rc1-orig/include/linux/fs.h 2012-12-25 16:31:10.784018000 +0900 +++ linux-3.8-rc1/include/linux/fs.h 2012-12-25 16:32:09.712018000 +0900 @@ -1323,6 +1323,9 @@ struct super_block { /* Number of nested freezes */ int s_freeze_count; + + /* Is freeze state locked? */ + bool s_freeze_locked; }; /* superblock cache pruning functions */ @@ -1881,7 +1884,9 @@ extern int vfs_statfs(struct path *, str extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); extern int vfs_ustat(dev_t, struct kstatfs *); +extern int __freeze_super(struct super_block *sb, bool lock); extern int freeze_super(struct super_block *super); +extern int __thaw_super(struct super_block *sb, bool unlock); extern int thaw_super(struct super_block *super); extern int thaw_super_force(struct super_block *super); extern void emergency_thaw_all(void); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html