[PATCH v6 89/99] btrfs: Convert buffer_radix to XArray

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>

Eliminate the buffer_lock as the internal xa_lock provides all the
necessary protection.  We can remove the radix_tree_preload calls, but
I can't find a good way to use the 'exists' result from xa_cmpxchg().
We could resort to the advanced API to improve this, but it's a really
unlikely case (nothing in the xarray when we first look; something there
when we try to add the newly-allocated extent buffer), so I think it's
not worth optimising for.

Signed-off-by: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>
---
 fs/btrfs/ctree.h             |  5 ++-
 fs/btrfs/disk-io.c           |  3 +-
 fs/btrfs/extent_io.c         | 82 ++++++++++++++++++--------------------------
 fs/btrfs/tests/btrfs-tests.c | 26 +++-----------
 4 files changed, 40 insertions(+), 76 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 272d099bed7e..87984ce3a4c2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1058,9 +1058,8 @@ struct btrfs_fs_info {
 	/* readahead works cnt */
 	atomic_t reada_works_cnt;
 
-	/* Extent buffer radix tree */
-	spinlock_t buffer_lock;
-	struct radix_tree_root buffer_radix;
+	/* Extent buffer array */
+	struct xarray buffer_array;
 
 	/* next backup root to be overwritten */
 	int backup_root_index;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1eae29045d43..650d1350b64d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2429,7 +2429,7 @@ int open_ctree(struct super_block *sb,
 	}
 
 	xa_init(&fs_info->fs_roots);
-	INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+	xa_init(&fs_info->buffer_array);
 	INIT_LIST_HEAD(&fs_info->trans_list);
 	INIT_LIST_HEAD(&fs_info->dead_roots);
 	INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2442,7 +2442,6 @@ int open_ctree(struct super_block *sb,
 	spin_lock_init(&fs_info->tree_mod_seq_lock);
 	spin_lock_init(&fs_info->super_lock);
 	spin_lock_init(&fs_info->qgroup_op_lock);
-	spin_lock_init(&fs_info->buffer_lock);
 	spin_lock_init(&fs_info->unused_bgs_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
 	mutex_init(&fs_info->unused_bg_unpin_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd5e9d887328..2b43fa11c9e2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4884,8 +4884,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 	struct extent_buffer *eb;
 
 	rcu_read_lock();
-	eb = radix_tree_lookup(&fs_info->buffer_radix,
-			       start >> PAGE_SHIFT);
+	eb = xa_load(&fs_info->buffer_array, start >> PAGE_SHIFT);
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
 		/*
@@ -4919,31 +4918,24 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
 					u64 start)
 {
-	struct extent_buffer *eb, *exists = NULL;
-	int ret;
+	struct extent_buffer *exists, *eb = NULL;
 
-	eb = find_extent_buffer(fs_info, start);
-	if (eb)
-		return eb;
-	eb = alloc_dummy_extent_buffer(fs_info, start);
-	if (!eb)
-		return NULL;
-	eb->fs_info = fs_info;
 again:
-	ret = radix_tree_preload(GFP_NOFS);
-	if (ret)
+	exists = find_extent_buffer(fs_info, start);
+	if (exists)
 		goto free_eb;
-	spin_lock(&fs_info->buffer_lock);
-	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_SHIFT, eb);
-	spin_unlock(&fs_info->buffer_lock);
-	radix_tree_preload_end();
-	if (ret == -EEXIST) {
-		exists = find_extent_buffer(fs_info, start);
-		if (exists)
+	if (!eb)
+		eb = alloc_dummy_extent_buffer(fs_info, start);
+	if (!eb)
+		return NULL;
+	exists = xa_cmpxchg(&fs_info->buffer_array, start >> PAGE_SHIFT,
+				NULL, eb, GFP_NOFS);
+	if (unlikely(exists)) {
+		if (xa_is_err(exists)) {
+			exists = NULL;
 			goto free_eb;
-		else
-			goto again;
+		}
+		goto again;
 	}
 	check_buffer_tree_ref(eb);
 	set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
@@ -4957,7 +4949,8 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
 	atomic_inc(&eb->refs);
 	return eb;
 free_eb:
-	btrfs_release_extent_buffer(eb);
+	if (eb)
+		btrfs_release_extent_buffer(eb);
 	return exists;
 }
 #endif
@@ -4969,22 +4962,24 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	unsigned long num_pages = num_extent_pages(start, len);
 	unsigned long i;
 	unsigned long index = start >> PAGE_SHIFT;
-	struct extent_buffer *eb;
+	struct extent_buffer *eb = NULL;
 	struct extent_buffer *exists = NULL;
 	struct page *p;
 	struct address_space *mapping = fs_info->btree_inode->i_mapping;
 	int uptodate = 1;
-	int ret;
 
 	if (!IS_ALIGNED(start, fs_info->sectorsize)) {
 		btrfs_err(fs_info, "bad tree block start %llu", start);
 		return ERR_PTR(-EINVAL);
 	}
 
-	eb = find_extent_buffer(fs_info, start);
-	if (eb)
-		return eb;
+again:
+	exists = find_extent_buffer(fs_info, start);
+	if (exists)
+		goto free_eb;
 
+	if (eb)
+		goto add;
 	eb = __alloc_extent_buffer(fs_info, start, len);
 	if (!eb)
 		return ERR_PTR(-ENOMEM);
@@ -5037,24 +5032,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	}
 	if (uptodate)
 		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-again:
-	ret = radix_tree_preload(GFP_NOFS);
-	if (ret) {
-		exists = ERR_PTR(ret);
-		goto free_eb;
-	}
-
-	spin_lock(&fs_info->buffer_lock);
-	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_SHIFT, eb);
-	spin_unlock(&fs_info->buffer_lock);
-	radix_tree_preload_end();
-	if (ret == -EEXIST) {
-		exists = find_extent_buffer(fs_info, start);
-		if (exists)
+add:
+	exists = xa_cmpxchg(&fs_info->buffer_array, start >> PAGE_SHIFT,
+				NULL, eb, GFP_NOFS);
+	if (unlikely(exists)) {
+		if (xa_is_err(exists)) {
+			exists = NULL;
 			goto free_eb;
-		else
-			goto again;
+		}
+		goto again;
 	}
 	/* add one reference for the tree */
 	check_buffer_tree_ref(eb);
@@ -5107,10 +5093,8 @@ static int release_extent_buffer(struct extent_buffer *eb)
 
 			spin_unlock(&eb->refs_lock);
 
-			spin_lock(&fs_info->buffer_lock);
-			radix_tree_delete(&fs_info->buffer_radix,
+			xa_erase(&fs_info->buffer_array,
 					  eb->start >> PAGE_SHIFT);
-			spin_unlock(&fs_info->buffer_lock);
 		} else {
 			spin_unlock(&eb->refs_lock);
 		}
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 570bce31a301..f80fd54903e9 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -110,7 +110,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
 		return NULL;
 	}
 
-	spin_lock_init(&fs_info->buffer_lock);
 	spin_lock_init(&fs_info->qgroup_lock);
 	spin_lock_init(&fs_info->qgroup_op_lock);
 	spin_lock_init(&fs_info->super_lock);
@@ -125,7 +124,7 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
 	INIT_LIST_HEAD(&fs_info->dirty_qgroups);
 	INIT_LIST_HEAD(&fs_info->dead_roots);
 	INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
-	INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+	xa_init(&fs_info->buffer_array);
 	xa_init(&fs_info->fs_roots);
 	extent_io_tree_init(&fs_info->freed_extents[0], NULL);
 	extent_io_tree_init(&fs_info->freed_extents[1], NULL);
@@ -139,8 +138,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
 
 void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	struct extent_buffer *eb;
+	unsigned long index = 0;
 
 	if (!fs_info)
 		return;
@@ -151,25 +150,8 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
 
 	test_mnt->mnt_sb->s_fs_info = NULL;
 
-	spin_lock(&fs_info->buffer_lock);
-	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
-		struct extent_buffer *eb;
-
-		eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
-		if (!eb)
-			continue;
-		/* Shouldn't happen but that kind of thinking creates CVE's */
-		if (radix_tree_exception(eb)) {
-			if (radix_tree_deref_retry(eb))
-				slot = radix_tree_iter_retry(&iter);
-			continue;
-		}
-		slot = radix_tree_iter_resume(slot, &iter);
-		spin_unlock(&fs_info->buffer_lock);
+	xa_for_each(&fs_info->buffer_array, eb, index, ULONG_MAX, XA_PRESENT)
 		free_extent_buffer_stale(eb);
-		spin_lock(&fs_info->buffer_lock);
-	}
-	spin_unlock(&fs_info->buffer_lock);
 
 	btrfs_free_qgroup_config(fs_info);
 	btrfs_free_fs_roots(fs_info);
-- 
2.15.1

--
To unsubscribe from this list: send the line "unsubscribe cgroups" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]     [Monitors]

  Powered by Linux