Implement an access to the page cache as lock-free variant. This is done because there are extra places where an access is required, therefore making it lock-less will remove any lock contention. For example we have a shrinker path as well as a reclaim kthread. In both cases a current CPU can access to a remote per-cpu page cache that would require taking a lock to protect it. An "rcuscale" performance test suite can detect it and shows some slight improvements: ../kvm.sh --memory 16G --torture rcuscale --allcpus --duration 10 \ --kconfig CONFIG_NR_CPUS=64 --bootargs "rcuscale.kfree_rcu_test=1 \ rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 \ rcuscale.kfree_rcu_test_double=1 torture.disable_onoff_at_boot" --trust-make 100 iterations, checking total time taken by all kfree'ers in ns.: default: AVG: 10968415107.5 MIN: 10668412500 MAX: 11312145160 patch: AVG: 10787596486.1 MIN: 10397559880 MAX: 11214901050 Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx> --- kernel/rcu/tree.c | 91 +++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 35 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9c8cfb01e9a6..4f04664d5ac0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3167,8 +3167,9 @@ struct kfree_rcu_cpu { atomic_t work_in_progress; struct hrtimer hrtimer; + // lock-free cache. struct llist_head bkvcache; - int nr_bkv_objs; + atomic_t nr_bkv_objs; }; static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = { @@ -3215,49 +3216,79 @@ krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags) raw_spin_unlock_irqrestore(&krcp->lock, flags); } +/* + * Increment 'v', if 'v' is below 'thresh'. Returns true if we + * succeeded, false if 'v' + 1 would be bigger than 'thresh'. + * + * Decrement 'v' if 'v' is upper 'thresh'. Returns true if we + * succeeded, false if 'v' - 1 would be smaller than 'thresh'. + */ +static inline bool +atomic_test_inc_dec(atomic_t *v, unsigned int thresh, bool inc) +{ + unsigned int cur = atomic_read(v); + unsigned int old; + + for (;;) { + if (inc) { + if (cur >= thresh) + return false; + } else { + if (cur <= thresh) + return false; + } + + old = atomic_cmpxchg(v, cur, inc ? (cur + 1):(cur - 1)); + if (old == cur) + break; + + cur = old; + } + + return true; +} + static inline struct kvfree_rcu_bulk_data * get_cached_bnode(struct kfree_rcu_cpu *krcp) { - if (!krcp->nr_bkv_objs) - return NULL; + struct kvfree_rcu_bulk_data *bnode = NULL; - krcp->nr_bkv_objs--; - return (struct kvfree_rcu_bulk_data *) - llist_del_first(&krcp->bkvcache); + if (atomic_test_inc_dec(&krcp->nr_bkv_objs, 0, false)) + bnode = (struct kvfree_rcu_bulk_data *) + llist_del_first(&krcp->bkvcache); + + return bnode; } static inline bool put_cached_bnode(struct kfree_rcu_cpu *krcp, struct kvfree_rcu_bulk_data *bnode) { - // Check the limit. - if (krcp->nr_bkv_objs >= rcu_min_cached_objs) - return false; - - llist_add((struct llist_node *) bnode, &krcp->bkvcache); - krcp->nr_bkv_objs++; - return true; + if (atomic_test_inc_dec(&krcp->nr_bkv_objs, rcu_min_cached_objs, true)) { + llist_add((struct llist_node *) bnode, &krcp->bkvcache); + return true; + } + return false; } static int drain_page_cache(struct kfree_rcu_cpu *krcp) { - unsigned long flags; - struct llist_node *page_list, *pos, *n; - int freed = 0; + struct kvfree_rcu_bulk_data *bnode; + int num_pages, i; - raw_spin_lock_irqsave(&krcp->lock, flags); - page_list = llist_del_all(&krcp->bkvcache); - krcp->nr_bkv_objs = 0; - raw_spin_unlock_irqrestore(&krcp->lock, flags); + num_pages = atomic_read(&krcp->nr_bkv_objs); + + for (i = 0; i < num_pages; i++) { + bnode = get_cached_bnode(krcp); + if (!bnode) + break; - llist_for_each_safe(pos, n, page_list) { - free_page((unsigned long)pos); - freed++; + free_page((unsigned long) bnode); } - return freed; + return i; } /* @@ -3314,10 +3345,8 @@ static void kfree_rcu_work(struct work_struct *work) } rcu_lock_release(&rcu_callback_map); - raw_spin_lock_irqsave(&krcp->lock, flags); if (put_cached_bnode(krcp, bkvhead[i])) bkvhead[i] = NULL; - raw_spin_unlock_irqrestore(&krcp->lock, flags); if (bkvhead[i]) free_page((unsigned long) bkvhead[i]); @@ -3460,7 +3489,6 @@ static void fill_page_cache_func(struct work_struct *work) struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu, page_cache_work.work); - unsigned long flags; bool pushed; int i; @@ -3469,10 +3497,7 @@ static void fill_page_cache_func(struct work_struct *work) __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); if (bnode) { - raw_spin_lock_irqsave(&krcp->lock, flags); pushed = put_cached_bnode(krcp, bnode); - raw_spin_unlock_irqrestore(&krcp->lock, flags); - if (!pushed) { free_page((unsigned long) bnode); break; @@ -3647,17 +3672,13 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { int cpu; unsigned long count = 0; - unsigned long flags; /* Snapshot count of all CPUs */ for_each_possible_cpu(cpu) { struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); count += READ_ONCE(krcp->count); - - raw_spin_lock_irqsave(&krcp->lock, flags); - count += krcp->nr_bkv_objs; - raw_spin_unlock_irqrestore(&krcp->lock, flags); + count += atomic_read(&krcp->nr_bkv_objs); } WRITE_ONCE(backoff_page_cache_fill, true); -- 2.20.1