The emergency thaw process uses iterate_super() which holds the sb->s_umount lock in read mode. The current thaw_super() code takes the sb->s_umount lock in write mode, hence leading to an instant deadlock. Pass the emergency state into the thaw_bdev/thaw_super code to avoid taking the s_umount lock in this case. We are running under the bdev freeze mutex, so this is still serialised against freeze despite only having a read lock on the sb->s_umount. Hence it should be safe to execute in this manner, especially given that emergency thaw is a rarely executed "get-out-of-jail" feature. Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx> --- diff -urNp linux-3.5-rc4-orig/fs/block_dev.c linux-3.5-rc4/fs/block_dev.c --- linux-3.5-rc4-orig/fs/block_dev.c 2012-06-29 14:03:40.732008807 +0900 +++ linux-3.5-rc4/fs/block_dev.c 2012-06-29 14:20:29.480002824 +0900 @@ -305,13 +305,14 @@ struct super_block *freeze_bdev(struct b EXPORT_SYMBOL(freeze_bdev); /** - * thaw_bdev -- unlock filesystem + * __thaw_bdev -- unlock filesystem * @bdev: blockdevice to unlock * @sb: associated superblock + * @emergency: emergency thaw * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). */ -int thaw_bdev(struct block_device *bdev, struct super_block *sb) +static int __thaw_bdev(struct block_device *bdev, struct super_block *sb, int emergency) { int error = -EINVAL; @@ -327,15 +328,35 @@ int thaw_bdev(struct block_device *bdev, if (!sb) goto out; - error = thaw_super(sb); + if (emergency) + error = thaw_super_emergency(sb); + else + error = thaw_super(sb); if (error) bdev->bd_fsfreeze_count++; out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } + +/** + * thaw_bdev -- unlock filesystem + * @bdev: blockdevice to unlock + * @sb: associated superblock + * + * Unlocks the filesystem and marks it writeable again after freeze_bdev(). + */ +int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return __thaw_bdev(bdev, sb, 0); +} EXPORT_SYMBOL(thaw_bdev); +int thaw_bdev_emergency(struct block_device *bdev, struct super_block *sb) +{ + return __thaw_bdev(bdev, sb, 1); +} + static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, blkdev_get_block, wbc); diff -urNp linux-3.5-rc4-orig/fs/buffer.c linux-3.5-rc4/fs/buffer.c --- linux-3.5-rc4-orig/fs/buffer.c 2012-06-27 12:00:47.762616000 +0900 +++ linux-3.5-rc4/fs/buffer.c 2012-06-29 14:21:41.948002825 +0900 @@ -514,7 +514,7 @@ repeat: static void do_thaw_one(struct super_block *sb, void *unused) { char b[BDEVNAME_SIZE]; - while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + while (sb->s_bdev && !thaw_bdev_emergency(sb->s_bdev, sb)) printk(KERN_WARNING "Emergency Thaw on %s\n", bdevname(sb->s_bdev, b)); } diff -urNp linux-3.5-rc4-orig/fs/super.c linux-3.5-rc4/fs/super.c --- linux-3.5-rc4-orig/fs/super.c 2012-05-21 07:29:13.000000000 +0900 +++ linux-3.5-rc4/fs/super.c 2012-06-29 14:49:57.480003483 +0900 @@ -1221,19 +1221,24 @@ int freeze_super(struct super_block *sb) EXPORT_SYMBOL(freeze_super); /** - * thaw_super -- unlock filesystem + * __thaw_super -- unlock filesystem * @sb: the super to thaw + * @emergency: emergency thaw * * Unlocks the filesystem and marks it writeable again after freeze_super(). + * If we are doing an emergency thaw, we don't need to grab the sb->s_umount + * lock as it is already held. */ -int thaw_super(struct super_block *sb) +static int __thaw_super(struct super_block *sb, int emergency) { - int error; + int error = 0; + + if (!emergency) + down_write(&sb->s_umount); - down_write(&sb->s_umount); if (sb->s_frozen == SB_UNFROZEN) { - up_write(&sb->s_umount); - return -EINVAL; + error = -EINVAL; + goto out_unlock; } if (sb->s_flags & MS_RDONLY) @@ -1245,8 +1250,7 @@ int thaw_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem thaw failed\n"); sb->s_frozen = SB_FREEZE_TRANS; - up_write(&sb->s_umount); - return error; + goto out_unlock; } } @@ -1254,8 +1258,44 @@ out: sb->s_frozen = SB_UNFROZEN; smp_wmb(); wake_up(&sb->s_wait_unfrozen); - deactivate_locked_super(sb); + + /* + * When called from emergency scope, we cannot grab the s_umount lock + * so we cannot deactivate the superblock. This may leave unbalanced + * superblock references which could prevent unmount, but given this is + * an emergency operation.... + */ + if (!emergency) + deactivate_locked_super(sb); return 0; + +out_unlock: + if (!emergency) + up_write(&sb->s_umount); + return error; +} + +/** + * thaw_super -- unlock filesystem + * @sb: the super to thaw + * + * Unlocks the filesystem and marks it writeable again after freeze_super(). + */ +int thaw_super(struct super_block *sb) +{ + return __thaw_super(sb, 0); } EXPORT_SYMBOL(thaw_super); + +/** + * thaw_super_emergency -- unlock filesystem + * @sb: the super to thaw + * + * Unlocks the filesystem and marks it writeable again after freeze_super(). + * This avoids taking the s_umount lock if it is already held. + */ +int thaw_super_emergency(struct super_block *sb) +{ + return __thaw_super(sb, 1); +} diff -urNp linux-3.5-rc4-orig/include/linux/fs.h linux-3.5-rc4/include/linux/fs.h --- linux-3.5-rc4-orig/include/linux/fs.h 2012-06-27 12:00:48.894616001 +0900 +++ linux-3.5-rc4/include/linux/fs.h 2012-06-29 14:43:51.096010758 +0900 @@ -1941,6 +1941,7 @@ extern int fd_statfs(int, struct kstatfs extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); +extern int thaw_super_emergency(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); extern int current_umask(void); @@ -2096,6 +2097,8 @@ extern void kill_bdev(struct block_devic extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); +extern int thaw_bdev_emergency(struct block_device *bdev, + struct super_block *sb); extern int fsync_bdev(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} @@ -2112,6 +2115,12 @@ static inline int thaw_bdev(struct block { return 0; } + +static inline int thaw_bdev_emergency(struct block_device *bdev, + struct super_block *sb) +{ + return 0; +} #endif extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html