[PATCH v5 76/78] md: Convert raid5-cache to XArray

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>

This is the first user of the radix tree I've converted which was
storing numbers rather than pointers.  I'm fairly pleased with how
well it came out.  There's less boiler-plate involved than there was
with the radix tree, so that's a win.  It does use the advanced API,
and I think that's a signal that there needs to be a separate API for
using the XArray for only integers.

Signed-off-by: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>
---
 drivers/md/raid5-cache.c | 119 ++++++++++++++++-------------------------------
 1 file changed, 40 insertions(+), 79 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 39f31f07ffe9..2c8ad0ed9b48 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -158,9 +158,8 @@ struct r5l_log {
 	/* to disable write back during in degraded mode */
 	struct work_struct disable_writeback_work;
 
-	/* to for chunk_aligned_read in writeback mode, details below */
-	spinlock_t tree_lock;
-	struct radix_tree_root big_stripe_tree;
+	/* for chunk_aligned_read in writeback mode, details below */
+	struct xarray big_stripe;
 };
 
 /*
@@ -170,9 +169,8 @@ struct r5l_log {
  * chunk contains 64 4kB-page, so this chunk contain 64 stripes). For
  * chunk_aligned_read, these stripes are grouped into one "big_stripe".
  * For each big_stripe, we count how many stripes of this big_stripe
- * are in the write back cache. These data are tracked in a radix tree
- * (big_stripe_tree). We use radix_tree item pointer as the counter.
- * r5c_tree_index() is used to calculate keys for the radix tree.
+ * are in the write back cache. This counter is tracked in an xarray
+ * (big_stripe). r5c_index() is used to calculate the index.
  *
  * chunk_aligned_read() calls r5c_big_stripe_cached() to look up
  * big_stripe of each chunk in the tree. If this big_stripe is in the
@@ -180,9 +178,9 @@ struct r5l_log {
  * rcu_read_lock().
  *
  * It is necessary to remember whether a stripe is counted in
- * big_stripe_tree. Instead of adding new flag, we reuses existing flags:
+ * big_stripe. Instead of adding new flag, we reuses existing flags:
  * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE. If either of these
- * two flags are set, the stripe is counted in big_stripe_tree. This
+ * two flags are set, the stripe is counted in big_stripe. This
  * requires moving set_bit(STRIPE_R5C_PARTIAL_STRIPE) to
  * r5c_try_caching_write(); and moving clear_bit of
  * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE to
@@ -190,23 +188,13 @@ struct r5l_log {
  */
 
 /*
- * radix tree requests lowest 2 bits of data pointer to be 2b'00.
- * So it is necessary to left shift the counter by 2 bits before using it
- * as data pointer of the tree.
- */
-#define R5C_RADIX_COUNT_SHIFT 2
-
-/*
- * calculate key for big_stripe_tree
+ * calculate key for big_stripe
  *
  * sect: align_bi->bi_iter.bi_sector or sh->sector
  */
-static inline sector_t r5c_tree_index(struct r5conf *conf,
-				      sector_t sect)
+static inline sector_t r5c_index(struct r5conf *conf, sector_t sect)
 {
-	sector_t offset;
-
-	offset = sector_div(sect, conf->chunk_sectors);
+	sector_div(sect, conf->chunk_sectors);
 	return sect;
 }
 
@@ -2646,10 +2634,6 @@ int r5c_try_caching_write(struct r5conf *conf,
 	int i;
 	struct r5dev *dev;
 	int to_cache = 0;
-	void **pslot;
-	sector_t tree_index;
-	int ret;
-	uintptr_t refcount;
 
 	BUG_ON(!r5c_is_writeback(log));
 
@@ -2697,39 +2681,29 @@ int r5c_try_caching_write(struct r5conf *conf,
 		}
 	}
 
-	/* if the stripe is not counted in big_stripe_tree, add it now */
+	/* if the stripe is not counted in big_stripe, add it now */
 	if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
 	    !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) {
-		tree_index = r5c_tree_index(conf, sh->sector);
-		spin_lock(&log->tree_lock);
-		pslot = radix_tree_lookup_slot(&log->big_stripe_tree,
-					       tree_index);
-		if (pslot) {
-			refcount = (uintptr_t)radix_tree_deref_slot_protected(
-				pslot, &log->tree_lock) >>
-				R5C_RADIX_COUNT_SHIFT;
-			radix_tree_replace_slot(
-				&log->big_stripe_tree, pslot,
-				(void *)((refcount + 1) << R5C_RADIX_COUNT_SHIFT));
-		} else {
-			/*
-			 * this radix_tree_insert can fail safely, so no
-			 * need to call radix_tree_preload()
-			 */
-			ret = radix_tree_insert(
-				&log->big_stripe_tree, tree_index,
-				(void *)(1 << R5C_RADIX_COUNT_SHIFT));
-			if (ret) {
-				spin_unlock(&log->tree_lock);
-				r5c_make_stripe_write_out(sh);
-				return -EAGAIN;
-			}
+		XA_STATE(xas, &log->big_stripe, r5c_index(conf, sh->sector));
+		void *entry;
+
+		/* Caller would rather handle failures than supply GFP flags */
+		xas_lock(&xas);
+		entry = xas_create(&xas);
+		if (entry)
+			entry = xa_mk_value(xa_to_value(entry) + 1);
+		else
+			entry = xa_mk_value(1);
+		xas_store(&xas, entry);
+		xas_unlock(&xas);
+		if (xas_error(&xas)) {
+			r5c_make_stripe_write_out(sh);
+			return -EAGAIN;
 		}
-		spin_unlock(&log->tree_lock);
 
 		/*
 		 * set STRIPE_R5C_PARTIAL_STRIPE, this shows the stripe is
-		 * counted in the radix tree
+		 * counted in big_stripe
 		 */
 		set_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state);
 		atomic_inc(&conf->r5c_cached_partial_stripes);
@@ -2812,9 +2786,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
 	struct r5l_log *log = conf->log;
 	int i;
 	int do_wakeup = 0;
-	sector_t tree_index;
-	void **pslot;
-	uintptr_t refcount;
 
 	if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
 		return;
@@ -2852,24 +2823,21 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
 	atomic_dec(&log->stripe_in_journal_count);
 	r5c_update_log_state(log);
 
-	/* stop counting this stripe in big_stripe_tree */
+	/* stop counting this stripe in big_stripe */
 	if (test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) ||
 	    test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) {
-		tree_index = r5c_tree_index(conf, sh->sector);
-		spin_lock(&log->tree_lock);
-		pslot = radix_tree_lookup_slot(&log->big_stripe_tree,
-					       tree_index);
-		BUG_ON(pslot == NULL);
-		refcount = (uintptr_t)radix_tree_deref_slot_protected(
-			pslot, &log->tree_lock) >>
-			R5C_RADIX_COUNT_SHIFT;
-		if (refcount == 1)
-			radix_tree_delete(&log->big_stripe_tree, tree_index);
+		XA_STATE(xas, &log->big_stripe, r5c_index(conf, sh->sector));
+		void *entry;
+
+		xas_lock(&xas);
+		entry = xas_load(&xas);
+		BUG_ON(!entry);
+		if (entry == xa_mk_value(1))
+			entry = NULL;
 		else
-			radix_tree_replace_slot(
-				&log->big_stripe_tree, pslot,
-				(void *)((refcount - 1) << R5C_RADIX_COUNT_SHIFT));
-		spin_unlock(&log->tree_lock);
+			entry = xa_mk_value(xa_to_value(entry) - 1);
+		xas_store(&xas, entry);
+		xas_unlock(&xas);
 	}
 
 	if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) {
@@ -2949,16 +2917,10 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
 bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
 {
 	struct r5l_log *log = conf->log;
-	sector_t tree_index;
-	void *slot;
 
 	if (!log)
 		return false;
-
-	WARN_ON_ONCE(!rcu_read_lock_held());
-	tree_index = r5c_tree_index(conf, sect);
-	slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
-	return slot != NULL;
+	return xa_load(&log->big_stripe, r5c_index(conf, sect)) != NULL;
 }
 
 static int r5l_load_log(struct r5l_log *log)
@@ -3112,8 +3074,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	if (!log->meta_pool)
 		goto out_mempool;
 
-	spin_lock_init(&log->tree_lock);
-	INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
+	xa_init(&log->big_stripe);
 
 	log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
 						 log->rdev->mddev, "reclaim");
-- 
2.15.1

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux