On Fri, Mar 12, 2021 at 01:38:14PM +0100, Uladzislau Rezki wrote: > Hello, Paul. > > Please do not consider this patch. It is buggy :) Consider it formally withdrawn, and thank you for letting me know! Thanx, Paul > -- > Vlad Rezki > > On Mon, Mar 8, 2021 at 8:50 PM Uladzislau Rezki (Sony) <urezki@xxxxxxxxx> > wrote: > > > Implement an access to the page cache as lock-free variant. This > > is done because there are extra places where an access is required, > > therefore making it lock-less will remove any lock contention. > > > > For example we have a shrinker path as well as a reclaim kthread. > > In both cases a current CPU can access to a remote per-cpu page > > cache that would require taking a lock to protect it. > > > > An "rcuscale" performance test suite can detect it and shows some > > slight improvements: > > > > ../kvm.sh --memory 16G --torture rcuscale --allcpus --duration 10 \ > > --kconfig CONFIG_NR_CPUS=64 --bootargs "rcuscale.kfree_rcu_test=1 \ > > rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 \ > > rcuscale.kfree_rcu_test_double=1 torture.disable_onoff_at_boot" > > --trust-make > > > > 100 iterations, checking total time taken by all kfree'ers in ns.: > > > > default: AVG: 10968415107.5 MIN: 10668412500 MAX: 11312145160 > > patch: AVG: 10787596486.1 MIN: 10397559880 MAX: 11214901050 > > > > Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx> > > --- > > kernel/rcu/tree.c | 91 +++++++++++++++++++++++++++++------------------ > > 1 file changed, 56 insertions(+), 35 deletions(-) > > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c > > index 9c8cfb01e9a6..4f04664d5ac0 100644 > > --- a/kernel/rcu/tree.c > > +++ b/kernel/rcu/tree.c > > @@ -3167,8 +3167,9 @@ struct kfree_rcu_cpu { > > atomic_t work_in_progress; > > struct hrtimer hrtimer; > > > > + // lock-free cache. > > struct llist_head bkvcache; > > - int nr_bkv_objs; > > + atomic_t nr_bkv_objs; > > }; > > > > static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = { > > @@ -3215,49 +3216,79 @@ krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, > > unsigned long flags) > > raw_spin_unlock_irqrestore(&krcp->lock, flags); > > } > > > > +/* > > + * Increment 'v', if 'v' is below 'thresh'. Returns true if we > > + * succeeded, false if 'v' + 1 would be bigger than 'thresh'. > > + * > > + * Decrement 'v' if 'v' is upper 'thresh'. Returns true if we > > + * succeeded, false if 'v' - 1 would be smaller than 'thresh'. > > + */ > > +static inline bool > > +atomic_test_inc_dec(atomic_t *v, unsigned int thresh, bool inc) > > +{ > > + unsigned int cur = atomic_read(v); > > + unsigned int old; > > + > > + for (;;) { > > + if (inc) { > > + if (cur >= thresh) > > + return false; > > + } else { > > + if (cur <= thresh) > > + return false; > > + } > > + > > + old = atomic_cmpxchg(v, cur, inc ? (cur + 1):(cur - 1)); > > + if (old == cur) > > + break; > > + > > + cur = old; > > + } > > + > > + return true; > > +} > > + > > static inline struct kvfree_rcu_bulk_data * > > get_cached_bnode(struct kfree_rcu_cpu *krcp) > > { > > - if (!krcp->nr_bkv_objs) > > - return NULL; > > + struct kvfree_rcu_bulk_data *bnode = NULL; > > > > - krcp->nr_bkv_objs--; > > - return (struct kvfree_rcu_bulk_data *) > > - llist_del_first(&krcp->bkvcache); > > + if (atomic_test_inc_dec(&krcp->nr_bkv_objs, 0, false)) > > + bnode = (struct kvfree_rcu_bulk_data *) > > + llist_del_first(&krcp->bkvcache); > > + > > + return bnode; > > } > > > > static inline bool > > put_cached_bnode(struct kfree_rcu_cpu *krcp, > > struct kvfree_rcu_bulk_data *bnode) > > { > > - // Check the limit. > > - if (krcp->nr_bkv_objs >= rcu_min_cached_objs) > > - return false; > > - > > - llist_add((struct llist_node *) bnode, &krcp->bkvcache); > > - krcp->nr_bkv_objs++; > > - return true; > > + if (atomic_test_inc_dec(&krcp->nr_bkv_objs, rcu_min_cached_objs, > > true)) { > > + llist_add((struct llist_node *) bnode, &krcp->bkvcache); > > + return true; > > + } > > > > + return false; > > } > > > > static int > > drain_page_cache(struct kfree_rcu_cpu *krcp) > > { > > - unsigned long flags; > > - struct llist_node *page_list, *pos, *n; > > - int freed = 0; > > + struct kvfree_rcu_bulk_data *bnode; > > + int num_pages, i; > > > > - raw_spin_lock_irqsave(&krcp->lock, flags); > > - page_list = llist_del_all(&krcp->bkvcache); > > - krcp->nr_bkv_objs = 0; > > - raw_spin_unlock_irqrestore(&krcp->lock, flags); > > + num_pages = atomic_read(&krcp->nr_bkv_objs); > > + > > + for (i = 0; i < num_pages; i++) { > > + bnode = get_cached_bnode(krcp); > > + if (!bnode) > > + break; > > > > - llist_for_each_safe(pos, n, page_list) { > > - free_page((unsigned long)pos); > > - freed++; > > + free_page((unsigned long) bnode); > > } > > > > - return freed; > > + return i; > > } > > > > /* > > @@ -3314,10 +3345,8 @@ static void kfree_rcu_work(struct work_struct *work) > > } > > rcu_lock_release(&rcu_callback_map); > > > > - raw_spin_lock_irqsave(&krcp->lock, flags); > > if (put_cached_bnode(krcp, bkvhead[i])) > > bkvhead[i] = NULL; > > - raw_spin_unlock_irqrestore(&krcp->lock, flags); > > > > if (bkvhead[i]) > > free_page((unsigned long) bkvhead[i]); > > @@ -3460,7 +3489,6 @@ static void fill_page_cache_func(struct work_struct > > *work) > > struct kfree_rcu_cpu *krcp = > > container_of(work, struct kfree_rcu_cpu, > > page_cache_work.work); > > - unsigned long flags; > > bool pushed; > > int i; > > > > @@ -3469,10 +3497,7 @@ static void fill_page_cache_func(struct work_struct > > *work) > > __get_free_page(GFP_KERNEL | __GFP_NORETRY | > > __GFP_NOMEMALLOC | __GFP_NOWARN); > > > > if (bnode) { > > - raw_spin_lock_irqsave(&krcp->lock, flags); > > pushed = put_cached_bnode(krcp, bnode); > > - raw_spin_unlock_irqrestore(&krcp->lock, flags); > > - > > if (!pushed) { > > free_page((unsigned long) bnode); > > break; > > @@ -3647,17 +3672,13 @@ kfree_rcu_shrink_count(struct shrinker *shrink, > > struct shrink_control *sc) > > { > > int cpu; > > unsigned long count = 0; > > - unsigned long flags; > > > > /* Snapshot count of all CPUs */ > > for_each_possible_cpu(cpu) { > > struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); > > > > count += READ_ONCE(krcp->count); > > - > > - raw_spin_lock_irqsave(&krcp->lock, flags); > > - count += krcp->nr_bkv_objs; > > - raw_spin_unlock_irqrestore(&krcp->lock, flags); > > + count += atomic_read(&krcp->nr_bkv_objs); > > } > > > > WRITE_ONCE(backoff_page_cache_fill, true); > > -- > > 2.20.1 > > > > > > -- > Uladzislau Rezki