>> The reason the deadlock is that: >> Task Btrfs-cleaner >> umount() >> down_write(&s->s_umount) >> sync_filesystem() >> do auto-defragment and produce >> lots of dirty pages >> close_ctree() >> wait for the end of >> btrfs-cleaner > > why it's needed to wait for cleaner during close_ctree? I got bitten > yesterday by (a non-deadlock) scenario, when there were tons of deleted > snapshots, filesystem almost full, so getting and managing free space > was slow (btrfs-cache thread was more active than btrfs-cleaner), and > umount just did not end. interruptible by reboot only. > > avoiding this particular case of waiting for cleaner: Your patch doesn't fix the problem. Task Btrfs-cleaner umount() down_write(&s->s_umount) sync_filesystem() do auto-defragment for some files and produce lots of dirty pages. do auto-defragment for the left files start_transaction reserve space shrink_delalloc() writeback_inodes_sb_nr_if_idle() down_read(&sb->s_umount) close_ctree() stop_kthread() wait for the end of btrfs-cleaner the deadlock still happens. But I found you add a trylock for ->s_umount in cleaner_kthread(), this method can fix the deadlock problem, I think. It may be introduced by the other patch, could you send that patch to me? I found if we fail to trylock ->cleaner_mutex, we will forget to unlock ->s_umount. Thanks Miao > > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root) > > mutex_lock(&root->fs_info->cleaner_mutex); > btrfs_run_delayed_iputs(root); > - btrfs_clean_old_snapshots(root); > + if (!btrfs_fs_closing(root->fs_info)) { > + /* btrfs_clean_old_snapshots(root); */ > + wake_up_process(root->fs_info->cleaner_kthread); > + printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n"); > + } else { > + printk(KERN_DEBUG "btrfs: skip cleaning when going down\n"); > + } > mutex_unlock(&root->fs_info->cleaner_mutex); > > /* wait until ongoing cleanup work done */ > > > plus not even trying to call the cleaner directly, rather waking the cleaner > thread. (attached whole work-in-progress patch). > > david > --- > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 58a232d..0651f6f 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -1617,13 +1617,14 @@ static int cleaner_kthread(void *arg) > struct btrfs_root *root = arg; > > do { > + int again = 0; > vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); > > if (!(root->fs_info->sb->s_flags & MS_RDONLY) && > down_read_trylock(&root->fs_info->sb->s_umount) && > mutex_trylock(&root->fs_info->cleaner_mutex)) { > btrfs_run_delayed_iputs(root); > - btrfs_clean_old_snapshots(root); > + again = btrfs_clean_one_old_snapshot(root); > mutex_unlock(&root->fs_info->cleaner_mutex); > btrfs_run_defrag_inodes(root->fs_info); > up_read(&root->fs_info->sb->s_umount); > @@ -1631,7 +1632,7 @@ static int cleaner_kthread(void *arg) > > if (freezing(current)) { > refrigerator(); > - } else { > + } else if (!again) {//FIXME: check again now directly from dead_roots? > set_current_state(TASK_INTERRUPTIBLE); > if (!kthread_should_stop()) > schedule(); > @@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root) > > mutex_lock(&root->fs_info->cleaner_mutex); > btrfs_run_delayed_iputs(root); > - btrfs_clean_old_snapshots(root); > + if (!btrfs_fs_closing(root->fs_info)) { > + /* btrfs_clean_old_snapshots(root); */ > + wake_up_process(root->fs_info->cleaner_kthread); > + printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n"); > + } else { > + printk(KERN_DEBUG "btrfs: skip cleaning when going down\n"); > + } > mutex_unlock(&root->fs_info->cleaner_mutex); > > /* wait until ongoing cleanup work done */ > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index ec1e0c6..3aba911 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -6995,6 +6995,11 @@ int btrfs_drop_snapshot(struct btrfs_root *root, > wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); > > while (1) { > + if (btrfs_fs_closing(root->fs_info)) { > + printk(KERN_DEBUG "btrfs: drop early exit\n"); > + err = -EAGAIN; > + goto out_end_trans; > + } > ret = walk_down_tree(trans, root, path, wc); > if (ret < 0) { > err = ret; > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c > index 922e6ec..c9dc857 100644 > --- a/fs/btrfs/relocation.c > +++ b/fs/btrfs/relocation.c > @@ -4060,6 +4060,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) > while (1) { > mutex_lock(&fs_info->cleaner_mutex); > > + // FIXME: wake cleaner > btrfs_clean_old_snapshots(fs_info->tree_root); > ret = relocate_block_group(rc); > > diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c > index de2942f..3d83f6b 100644 > --- a/fs/btrfs/transaction.c > +++ b/fs/btrfs/transaction.c > @@ -783,7 +783,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, > int btrfs_add_dead_root(struct btrfs_root *root) > { > spin_lock(&root->fs_info->trans_lock); > - list_add(&root->root_list, &root->fs_info->dead_roots); > + list_add_tail(&root->root_list, &root->fs_info->dead_roots); > spin_unlock(&root->fs_info->trans_lock); > return 0; > } > @@ -1533,7 +1533,45 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) > ret = btrfs_drop_snapshot(root, NULL, 0, 0); > else > ret =btrfs_drop_snapshot(root, NULL, 1, 0); > - BUG_ON(ret < 0); > + BUG_ON(ret < 0 && ret != -EAGAIN); > } > return 0; > } > +/* > + * return < 0 if error > + * 0 if there are no more dead_roots at the time of call > + * 1 there are more to be processed, call me again > + * > + * (racy) > + */ > +int btrfs_clean_one_old_snapshot(struct btrfs_root *root) > +{ > + int ret; > + int run_again = 1; > + struct btrfs_fs_info *fs_info = root->fs_info; > + > + if (root->fs_info->sb->s_flags & MS_RDONLY) { > + printk(KERN_WARNING "btrfs: cleaner called for RO fs!\n"); > + } > + > + spin_lock(&fs_info->trans_lock); > + root = list_first_entry(&fs_info->dead_roots, > + struct btrfs_root, root_list); > + list_del(&root->root_list); > + if (list_empty(&fs_info->dead_roots)) > + run_again = 0; > + spin_unlock(&fs_info->trans_lock); > + > + printk(KERN_DEBUG "btrfs: cleaner removing %llu\n", > + (unsigned long long)root->objectid); > + > + btrfs_kill_all_delayed_nodes(root); > + > + if (btrfs_header_backref_rev(root->node) < > + BTRFS_MIXED_BACKREF_REV) > + ret = btrfs_drop_snapshot(root, NULL, 0, 0); > + else > + ret = btrfs_drop_snapshot(root, NULL, 1, 0); > + BUG_ON(ret < 0 && ret != -EAGAIN); > + return run_again || ret == -EAGAIN; > +} > diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h > index fe27379..7071ca5 100644 > --- a/fs/btrfs/transaction.h > +++ b/fs/btrfs/transaction.h > @@ -94,6 +94,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, > int btrfs_add_dead_root(struct btrfs_root *root); > int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); > int btrfs_clean_old_snapshots(struct btrfs_root *root); > +int btrfs_clean_one_old_snapshot(struct btrfs_root *root); > int btrfs_commit_transaction(struct btrfs_trans_handle *trans, > struct btrfs_root *root); > int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, > --- > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html