The emergency thaw process uses iterate_super() which holds the sb->s_umount lock in read mode. The current thaw_super() code takes the sb->s_umount lock in write mode, hence leading to an instant deadlock. Pass the emergency state into the thaw_bdev/thaw_super code to avoid taking the s_umount lock in this case. We are running under the bdev freeze mutex, so this is still serialised against freeze despite only having a read lock on the sb->s_umount. Hence it should be safe to execute in this manner, especially given that emergency thaw is a rarely executed "get-out-of-jail" feature. Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx> --- diff -urNp vfs-orig/fs/block_dev.c vfs/fs/block_dev.c --- vfs-orig/fs/block_dev.c 2012-07-12 13:26:42.108635927 +0900 +++ vfs/fs/block_dev.c 2012-07-12 13:26:56.964628082 +0900 @@ -305,13 +305,14 @@ struct super_block *freeze_bdev(struct b EXPORT_SYMBOL(freeze_bdev); /** - * thaw_bdev -- unlock filesystem + * __thaw_bdev -- unlock filesystem * @bdev: blockdevice to unlock * @sb: associated superblock + * @emergency: emergency thaw * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). */ -int thaw_bdev(struct block_device *bdev, struct super_block *sb) +static int __thaw_bdev(struct block_device *bdev, struct super_block *sb, int emergency) { int error = -EINVAL; @@ -324,15 +325,35 @@ int thaw_bdev(struct block_device *bdev, goto out; } - error = thaw_super(sb); + if (emergency) + error = thaw_super_emergency(sb); + else + error = thaw_super(sb); if (error) bdev->bd_fsfreeze_count++; out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } + +/** + * thaw_bdev -- unlock filesystem + * @bdev: blockdevice to unlock + * @sb: associated superblock + * + * Unlocks the filesystem and marks it writeable again after freeze_bdev(). + */ +int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return __thaw_bdev(bdev, sb, 0); +} EXPORT_SYMBOL(thaw_bdev); +int thaw_bdev_emergency(struct block_device *bdev, struct super_block *sb) +{ + return __thaw_bdev(bdev, sb, 1); +} + static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, blkdev_get_block, wbc); diff -urNp vfs-orig/fs/buffer.c vfs/fs/buffer.c --- vfs-orig/fs/buffer.c 2012-07-04 18:57:54.000000000 +0900 +++ vfs/fs/buffer.c 2012-07-12 13:26:56.964628082 +0900 @@ -514,7 +514,7 @@ repeat: static void do_thaw_one(struct super_block *sb, void *unused) { char b[BDEVNAME_SIZE]; - while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + while (sb->s_bdev && !thaw_bdev_emergency(sb->s_bdev, sb)) printk(KERN_WARNING "Emergency Thaw on %s\n", bdevname(sb->s_bdev, b)); } diff -urNp vfs-orig/fs/super.c vfs/fs/super.c --- vfs-orig/fs/super.c 2012-07-04 18:57:54.000000000 +0900 +++ vfs/fs/super.c 2012-07-12 14:21:09.736566768 +0900 @@ -1221,23 +1221,25 @@ int freeze_super(struct super_block *sb) EXPORT_SYMBOL(freeze_super); /** - * thaw_super -- unlock filesystem + * __thaw_super -- unlock filesystem * @sb: the super to thaw + * @emergency: emergency thaw * * Unlocks the filesystem and marks it writeable again after freeze_super(). + * This is the unlocked version of thaw_super and has to be called with the + * sb->s_umount lock held in the non-emergency thaw case. */ -int thaw_super(struct super_block *sb) +static int __thaw_super(struct super_block *sb, int emergency) { - int error; + int error = 0; - down_write(&sb->s_umount); if (sb->s_frozen == SB_UNFROZEN) { - up_write(&sb->s_umount); - return -EINVAL; + error = -EINVAL; + goto out; } if (sb->s_flags & MS_RDONLY) - goto out; + goto out_thaw; if (sb->s_op->unfreeze_fs) { error = sb->s_op->unfreeze_fs(sb); @@ -1245,17 +1247,52 @@ int thaw_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem thaw failed\n"); sb->s_frozen = SB_FREEZE_TRANS; - up_write(&sb->s_umount); - return error; + goto out; } } -out: +out_thaw: sb->s_frozen = SB_UNFROZEN; smp_wmb(); wake_up(&sb->s_wait_unfrozen); - deactivate_locked_super(sb); - return 0; + /* + * When called from emergency scope, we cannot grab the s_umount lock + * so we cannot deactivate the superblock. This may leave unbalanced + * superblock references which could prevent unmount, but given this is + * an emergency operation.... + */ + if (!emergency) + deactivate_locked_super(sb); +out: + return error; +} + +/** + * thaw_super -- unlock filesystem + * @sb: the super to thaw + * + * Unlocks the filesystem and marks it writeable again after freeze_super(). + */ +int thaw_super(struct super_block *sb) +{ + int res; + down_write(&sb->s_umount); + res = __thaw_super(sb, 0); + up_write(&sb->s_umount); + return res; } EXPORT_SYMBOL(thaw_super); + +/** + * thaw_super_emergency -- unlock filesystem + * @sb: the super to thaw + * + * Unlocks the filesystem and marks it writeable again after freeze_super(). + * The kernel gets here holding the sb->s_umount lock in read mode so we cannot + * grab it again in write mode. + */ +int thaw_super_emergency(struct super_block *sb) +{ + return __thaw_super(sb, 1); +} diff -urNp vfs-orig/include/linux/fs.h vfs/include/linux/fs.h --- vfs-orig/include/linux/fs.h 2012-07-04 18:57:54.000000000 +0900 +++ vfs/include/linux/fs.h 2012-07-12 13:26:56.968627781 +0900 @@ -1944,6 +1944,7 @@ extern int fd_statfs(int, struct kstatfs extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); +extern int thaw_super_emergency(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); extern int current_umask(void); @@ -2107,6 +2108,8 @@ extern void kill_bdev(struct block_devic extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); +extern int thaw_bdev_emergency(struct block_device *bdev, + struct super_block *sb); extern int fsync_bdev(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} @@ -2123,6 +2126,12 @@ static inline int thaw_bdev(struct block { return 0; } + +static inline int thaw_bdev_emergency(struct block_device *bdev, + struct super_block *sb) +{ + return 0; +} #endif extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html