[PATCH 2/8] fsfreeze: emergency thaw will deadlock on s_umount

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The emergency thaw process uses iterate_super() which holds the
sb->s_umount lock in read mode. The current thaw_super() code takes
the sb->s_umount lock in write mode, hence leading to an instant
deadlock.

Pass the emergency state into the thaw_bdev/thaw_super code to avoid
taking the s_umount lock in this case. We are running under the bdev
freeze mutex, so this is still serialised against freeze despite
only having a read lock on the sb->s_umount. Hence it should be safe
to execute in this manner, especially given that emergency thaw is a
rarely executed "get-out-of-jail" feature.

Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Cc: Jan Kara <jack@xxxxxxx>
Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx>
---

diff -urNp vfs-orig/fs/block_dev.c vfs/fs/block_dev.c
--- vfs-orig/fs/block_dev.c	2012-07-12 13:26:42.108635927 +0900
+++ vfs/fs/block_dev.c	2012-07-12 13:26:56.964628082 +0900
@@ -305,13 +305,14 @@ struct super_block *freeze_bdev(struct b
 EXPORT_SYMBOL(freeze_bdev);
 
 /**
- * thaw_bdev  -- unlock filesystem
+ * __thaw_bdev  -- unlock filesystem
  * @bdev:	blockdevice to unlock
  * @sb:		associated superblock
+ * @emergency:	emergency thaw
  *
  * Unlocks the filesystem and marks it writeable again after freeze_bdev().
  */
-int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+static int __thaw_bdev(struct block_device *bdev, struct super_block *sb, int emergency)
 {
 	int error = -EINVAL;
 
@@ -324,15 +325,35 @@ int thaw_bdev(struct block_device *bdev,
 		goto out;
 	}
 
-	error = thaw_super(sb);
+	if (emergency)
+		error = thaw_super_emergency(sb);
+	else
+		error = thaw_super(sb);
 	if (error)
 		bdev->bd_fsfreeze_count++;
 out:
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return error;
 }
+
+/**
+ * thaw_bdev  -- unlock filesystem
+ * @bdev:	blockdevice to unlock
+ * @sb:		associated superblock
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_bdev().
+ */
+int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+	return __thaw_bdev(bdev, sb, 0);
+}
 EXPORT_SYMBOL(thaw_bdev);
 
+int thaw_bdev_emergency(struct block_device *bdev, struct super_block *sb)
+{
+	return __thaw_bdev(bdev, sb, 1);
+}
+
 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, blkdev_get_block, wbc);
diff -urNp vfs-orig/fs/buffer.c vfs/fs/buffer.c
--- vfs-orig/fs/buffer.c	2012-07-04 18:57:54.000000000 +0900
+++ vfs/fs/buffer.c	2012-07-12 13:26:56.964628082 +0900
@@ -514,7 +514,7 @@ repeat:
 static void do_thaw_one(struct super_block *sb, void *unused)
 {
 	char b[BDEVNAME_SIZE];
-	while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
+	while (sb->s_bdev && !thaw_bdev_emergency(sb->s_bdev, sb))
 		printk(KERN_WARNING "Emergency Thaw on %s\n",
 		       bdevname(sb->s_bdev, b));
 }
diff -urNp vfs-orig/fs/super.c vfs/fs/super.c
--- vfs-orig/fs/super.c	2012-07-04 18:57:54.000000000 +0900
+++ vfs/fs/super.c	2012-07-12 14:21:09.736566768 +0900
@@ -1221,23 +1221,25 @@ int freeze_super(struct super_block *sb)
 EXPORT_SYMBOL(freeze_super);
 
 /**
- * thaw_super -- unlock filesystem
+ * __thaw_super -- unlock filesystem
  * @sb: the super to thaw
+ * @emergency:	emergency thaw
  *
  * Unlocks the filesystem and marks it writeable again after freeze_super().
+ * This is the unlocked version of thaw_super and has to be called with the
+ * sb->s_umount lock held in the non-emergency thaw case.
  */
-int thaw_super(struct super_block *sb)
+static int __thaw_super(struct super_block *sb, int emergency)
 {
-	int error;
+	int error = 0;
 
-	down_write(&sb->s_umount);
 	if (sb->s_frozen == SB_UNFROZEN) {
-		up_write(&sb->s_umount);
-		return -EINVAL;
+		error = -EINVAL;
+		goto out;
 	}
 
 	if (sb->s_flags & MS_RDONLY)
-		goto out;
+		goto out_thaw;
 
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
@@ -1245,17 +1247,52 @@ int thaw_super(struct super_block *sb)
 			printk(KERN_ERR
 				"VFS:Filesystem thaw failed\n");
 			sb->s_frozen = SB_FREEZE_TRANS;
-			up_write(&sb->s_umount);
-			return error;
+			goto out;
 		}
 	}
 
-out:
+out_thaw:
 	sb->s_frozen = SB_UNFROZEN;
 	smp_wmb();
 	wake_up(&sb->s_wait_unfrozen);
-	deactivate_locked_super(sb);
 
-	return 0;
+	/*
+	 * When called from emergency scope, we cannot grab the s_umount lock
+	 * so we cannot deactivate the superblock. This may leave unbalanced
+	 * superblock references which could prevent unmount, but given this is
+	 * an emergency operation....
+	 */
+	if (!emergency)
+		deactivate_locked_super(sb);
+out:
+	return error;
+}
+
+/**
+ * thaw_super -- unlock filesystem
+ * @sb: the super to thaw
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super().
+ */
+int thaw_super(struct super_block *sb)
+{
+	int res;
+	down_write(&sb->s_umount);
+	res = __thaw_super(sb, 0);
+	up_write(&sb->s_umount);
+	return res;
 }
 EXPORT_SYMBOL(thaw_super);
+
+/**
+ * thaw_super_emergency  -- unlock filesystem
+ * @sb: the super to thaw
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super().
+ * The kernel gets here holding the sb->s_umount lock in read mode so we cannot
+ * grab it again in write mode.
+ */
+int thaw_super_emergency(struct super_block *sb)
+{
+	return __thaw_super(sb, 1);
+}
diff -urNp vfs-orig/include/linux/fs.h vfs/include/linux/fs.h
--- vfs-orig/include/linux/fs.h	2012-07-04 18:57:54.000000000 +0900
+++ vfs/include/linux/fs.h	2012-07-12 13:26:56.968627781 +0900
@@ -1944,6 +1944,7 @@ extern int fd_statfs(int, struct kstatfs
 extern int vfs_ustat(dev_t, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
+extern int thaw_super_emergency(struct super_block *super);
 extern bool our_mnt(struct vfsmount *mnt);
 
 extern int current_umask(void);
@@ -2107,6 +2108,8 @@ extern void kill_bdev(struct block_devic
 extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
+extern int thaw_bdev_emergency(struct block_device *bdev,
+			       struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
@@ -2123,6 +2126,12 @@ static inline int thaw_bdev(struct block
 {
 	return 0;
 }
+
+static inline int thaw_bdev_emergency(struct block_device *bdev,
+				      struct super_block *sb)
+{
+	return 0;
+}
 #endif
 extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux