From: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx> This is the first user of the radix tree I've converted which was storing numbers rather than pointers. I'm fairly pleased with how well it came out. There's less boiler-plate involved than there was with the radix tree, so that's a win. It does use the advanced API, and I think that's a signal that there needs to be a separate API for using the XArray for only integers. Signed-off-by: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx> --- drivers/md/raid5-cache.c | 119 ++++++++++++++++------------------------------- 1 file changed, 40 insertions(+), 79 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 39f31f07ffe9..2c8ad0ed9b48 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -158,9 +158,8 @@ struct r5l_log { /* to disable write back during in degraded mode */ struct work_struct disable_writeback_work; - /* to for chunk_aligned_read in writeback mode, details below */ - spinlock_t tree_lock; - struct radix_tree_root big_stripe_tree; + /* for chunk_aligned_read in writeback mode, details below */ + struct xarray big_stripe; }; /* @@ -170,9 +169,8 @@ struct r5l_log { * chunk contains 64 4kB-page, so this chunk contain 64 stripes). For * chunk_aligned_read, these stripes are grouped into one "big_stripe". * For each big_stripe, we count how many stripes of this big_stripe - * are in the write back cache. These data are tracked in a radix tree - * (big_stripe_tree). We use radix_tree item pointer as the counter. - * r5c_tree_index() is used to calculate keys for the radix tree. + * are in the write back cache. This counter is tracked in an xarray + * (big_stripe). r5c_index() is used to calculate the index. * * chunk_aligned_read() calls r5c_big_stripe_cached() to look up * big_stripe of each chunk in the tree. If this big_stripe is in the @@ -180,9 +178,9 @@ struct r5l_log { * rcu_read_lock(). * * It is necessary to remember whether a stripe is counted in - * big_stripe_tree. Instead of adding new flag, we reuses existing flags: + * big_stripe. Instead of adding new flag, we reuses existing flags: * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE. If either of these - * two flags are set, the stripe is counted in big_stripe_tree. This + * two flags are set, the stripe is counted in big_stripe. This * requires moving set_bit(STRIPE_R5C_PARTIAL_STRIPE) to * r5c_try_caching_write(); and moving clear_bit of * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE to @@ -190,23 +188,13 @@ struct r5l_log { */ /* - * radix tree requests lowest 2 bits of data pointer to be 2b'00. - * So it is necessary to left shift the counter by 2 bits before using it - * as data pointer of the tree. - */ -#define R5C_RADIX_COUNT_SHIFT 2 - -/* - * calculate key for big_stripe_tree + * calculate key for big_stripe * * sect: align_bi->bi_iter.bi_sector or sh->sector */ -static inline sector_t r5c_tree_index(struct r5conf *conf, - sector_t sect) +static inline sector_t r5c_index(struct r5conf *conf, sector_t sect) { - sector_t offset; - - offset = sector_div(sect, conf->chunk_sectors); + sector_div(sect, conf->chunk_sectors); return sect; } @@ -2646,10 +2634,6 @@ int r5c_try_caching_write(struct r5conf *conf, int i; struct r5dev *dev; int to_cache = 0; - void **pslot; - sector_t tree_index; - int ret; - uintptr_t refcount; BUG_ON(!r5c_is_writeback(log)); @@ -2697,39 +2681,29 @@ int r5c_try_caching_write(struct r5conf *conf, } } - /* if the stripe is not counted in big_stripe_tree, add it now */ + /* if the stripe is not counted in big_stripe, add it now */ if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) { - tree_index = r5c_tree_index(conf, sh->sector); - spin_lock(&log->tree_lock); - pslot = radix_tree_lookup_slot(&log->big_stripe_tree, - tree_index); - if (pslot) { - refcount = (uintptr_t)radix_tree_deref_slot_protected( - pslot, &log->tree_lock) >> - R5C_RADIX_COUNT_SHIFT; - radix_tree_replace_slot( - &log->big_stripe_tree, pslot, - (void *)((refcount + 1) << R5C_RADIX_COUNT_SHIFT)); - } else { - /* - * this radix_tree_insert can fail safely, so no - * need to call radix_tree_preload() - */ - ret = radix_tree_insert( - &log->big_stripe_tree, tree_index, - (void *)(1 << R5C_RADIX_COUNT_SHIFT)); - if (ret) { - spin_unlock(&log->tree_lock); - r5c_make_stripe_write_out(sh); - return -EAGAIN; - } + XA_STATE(xas, &log->big_stripe, r5c_index(conf, sh->sector)); + void *entry; + + /* Caller would rather handle failures than supply GFP flags */ + xas_lock(&xas); + entry = xas_create(&xas); + if (entry) + entry = xa_mk_value(xa_to_value(entry) + 1); + else + entry = xa_mk_value(1); + xas_store(&xas, entry); + xas_unlock(&xas); + if (xas_error(&xas)) { + r5c_make_stripe_write_out(sh); + return -EAGAIN; } - spin_unlock(&log->tree_lock); /* * set STRIPE_R5C_PARTIAL_STRIPE, this shows the stripe is - * counted in the radix tree + * counted in big_stripe */ set_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state); atomic_inc(&conf->r5c_cached_partial_stripes); @@ -2812,9 +2786,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf, struct r5l_log *log = conf->log; int i; int do_wakeup = 0; - sector_t tree_index; - void **pslot; - uintptr_t refcount; if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)) return; @@ -2852,24 +2823,21 @@ void r5c_finish_stripe_write_out(struct r5conf *conf, atomic_dec(&log->stripe_in_journal_count); r5c_update_log_state(log); - /* stop counting this stripe in big_stripe_tree */ + /* stop counting this stripe in big_stripe */ if (test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) || test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) { - tree_index = r5c_tree_index(conf, sh->sector); - spin_lock(&log->tree_lock); - pslot = radix_tree_lookup_slot(&log->big_stripe_tree, - tree_index); - BUG_ON(pslot == NULL); - refcount = (uintptr_t)radix_tree_deref_slot_protected( - pslot, &log->tree_lock) >> - R5C_RADIX_COUNT_SHIFT; - if (refcount == 1) - radix_tree_delete(&log->big_stripe_tree, tree_index); + XA_STATE(xas, &log->big_stripe, r5c_index(conf, sh->sector)); + void *entry; + + xas_lock(&xas); + entry = xas_load(&xas); + BUG_ON(!entry); + if (entry == xa_mk_value(1)) + entry = NULL; else - radix_tree_replace_slot( - &log->big_stripe_tree, pslot, - (void *)((refcount - 1) << R5C_RADIX_COUNT_SHIFT)); - spin_unlock(&log->tree_lock); + entry = xa_mk_value(xa_to_value(entry) - 1); + xas_store(&xas, entry); + xas_unlock(&xas); } if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) { @@ -2949,16 +2917,10 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh) bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect) { struct r5l_log *log = conf->log; - sector_t tree_index; - void *slot; if (!log) return false; - - WARN_ON_ONCE(!rcu_read_lock_held()); - tree_index = r5c_tree_index(conf, sect); - slot = radix_tree_lookup(&log->big_stripe_tree, tree_index); - return slot != NULL; + return xa_load(&log->big_stripe, r5c_index(conf, sect)) != NULL; } static int r5l_load_log(struct r5l_log *log) @@ -3112,8 +3074,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) if (!log->meta_pool) goto out_mempool; - spin_lock_init(&log->tree_lock); - INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN); + xa_init(&log->big_stripe); log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); -- 2.15.1