Re: [PATCH 2/4] Btrfs: fix deadlock on sb->s_umount when doing umount

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Apr 26, 2012 at 10:58:04AM +0800, Miao Xie wrote:
> The reason the deadlock is that:
>   Task					Btrfs-cleaner
>   umount()
>     down_write(&s->s_umount)
>     sync_filesystem()
> 					do auto-defragment and produce
> 					lots of dirty pages
>     close_ctree()
>       wait for the end of
>       btrfs-cleaner

why it's needed to wait for cleaner during close_ctree? I got bitten
yesterday by (a non-deadlock) scenario, when there were tons of deleted
snapshots, filesystem almost full, so getting and managing free space
was slow (btrfs-cache thread was more active than btrfs-cleaner), and
umount just did not end. interruptible by reboot only.

avoiding this particular case of waiting for cleaner:

--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)

        mutex_lock(&root->fs_info->cleaner_mutex);
        btrfs_run_delayed_iputs(root);
-       btrfs_clean_old_snapshots(root);
+       if (!btrfs_fs_closing(root->fs_info)) {
+               /* btrfs_clean_old_snapshots(root); */
+               wake_up_process(root->fs_info->cleaner_kthread);
+               printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
+       } else {
+               printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
+       }
        mutex_unlock(&root->fs_info->cleaner_mutex);

        /* wait until ongoing cleanup work done */


plus not even trying to call the cleaner directly, rather waking the cleaner
thread. (attached whole work-in-progress patch).

david
---

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 58a232d..0651f6f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1617,13 +1617,14 @@ static int cleaner_kthread(void *arg)
 	struct btrfs_root *root = arg;
 
 	do {
+		int again = 0;
 		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
 
 		if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
 			down_read_trylock(&root->fs_info->sb->s_umount) &&
 		    mutex_trylock(&root->fs_info->cleaner_mutex)) {
 			btrfs_run_delayed_iputs(root);
-			btrfs_clean_old_snapshots(root);
+			again = btrfs_clean_one_old_snapshot(root);
 			mutex_unlock(&root->fs_info->cleaner_mutex);
 			btrfs_run_defrag_inodes(root->fs_info);
 			up_read(&root->fs_info->sb->s_umount);
@@ -1631,7 +1632,7 @@ static int cleaner_kthread(void *arg)
 
 		if (freezing(current)) {
 			refrigerator();
-		} else {
+		} else if (!again) {//FIXME: check again now directly from dead_roots?
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (!kthread_should_stop())
 				schedule();
@@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)
 
 	mutex_lock(&root->fs_info->cleaner_mutex);
 	btrfs_run_delayed_iputs(root);
-	btrfs_clean_old_snapshots(root);
+	if (!btrfs_fs_closing(root->fs_info)) {
+		/* btrfs_clean_old_snapshots(root); */
+		wake_up_process(root->fs_info->cleaner_kthread);
+		printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
+	} else {
+		printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
+	}
 	mutex_unlock(&root->fs_info->cleaner_mutex);
 
 	/* wait until ongoing cleanup work done */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ec1e0c6..3aba911 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6995,6 +6995,11 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
 	while (1) {
+		if (btrfs_fs_closing(root->fs_info)) {
+			printk(KERN_DEBUG "btrfs: drop early exit\n");
+			err = -EAGAIN;
+			goto out_end_trans;
+		}
 		ret = walk_down_tree(trans, root, path, wc);
 		if (ret < 0) {
 			err = ret;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 922e6ec..c9dc857 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4060,6 +4060,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 	while (1) {
 		mutex_lock(&fs_info->cleaner_mutex);
 
+		// FIXME: wake cleaner
 		btrfs_clean_old_snapshots(fs_info->tree_root);
 		ret = relocate_block_group(rc);
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index de2942f..3d83f6b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -783,7 +783,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 int btrfs_add_dead_root(struct btrfs_root *root)
 {
 	spin_lock(&root->fs_info->trans_lock);
-	list_add(&root->root_list, &root->fs_info->dead_roots);
+	list_add_tail(&root->root_list, &root->fs_info->dead_roots);
 	spin_unlock(&root->fs_info->trans_lock);
 	return 0;
 }
@@ -1533,7 +1533,45 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 			ret = btrfs_drop_snapshot(root, NULL, 0, 0);
 		else
 			ret =btrfs_drop_snapshot(root, NULL, 1, 0);
-		BUG_ON(ret < 0);
+		BUG_ON(ret < 0 && ret != -EAGAIN);
 	}
 	return 0;
 }
+/*
+ * return < 0 if error
+ * 0 if there are no more dead_roots at the time of call
+ * 1 there are more to be processed, call me again
+ *
+ * (racy)
+ */
+int btrfs_clean_one_old_snapshot(struct btrfs_root *root)
+{
+	int ret;
+	int run_again = 1;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	if (root->fs_info->sb->s_flags & MS_RDONLY) {
+		printk(KERN_WARNING "btrfs: cleaner called for RO fs!\n");
+	}
+
+	spin_lock(&fs_info->trans_lock);
+	root = list_first_entry(&fs_info->dead_roots,
+			struct btrfs_root, root_list);
+	list_del(&root->root_list);
+	if (list_empty(&fs_info->dead_roots))
+		run_again = 0;
+	spin_unlock(&fs_info->trans_lock);
+
+	printk(KERN_DEBUG "btrfs: cleaner removing %llu\n",
+			(unsigned long long)root->objectid);
+
+	btrfs_kill_all_delayed_nodes(root);
+
+	if (btrfs_header_backref_rev(root->node) <
+			BTRFS_MIXED_BACKREF_REV)
+		ret = btrfs_drop_snapshot(root, NULL, 0, 0);
+	else
+		ret = btrfs_drop_snapshot(root, NULL, 1, 0);
+	BUG_ON(ret < 0 && ret != -EAGAIN);
+	return run_again || ret == -EAGAIN;
+}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index fe27379..7071ca5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -94,6 +94,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 int btrfs_add_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
+int btrfs_clean_one_old_snapshot(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root);
 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
---
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux