From: Alexei Starovoitov <ast@xxxxxxxxxx> The next patch will introduce cross-cpu llist access and existing irq_work_sync() + drain_mem_cache() + rcu_barrier_tasks_trace() mechanism will not be enough, since irq_work_sync() + drain_mem_cache() on cpu A won't guarantee that llist on cpu A are empty. The free_bulk() on cpu B might add objects back to llist of cpu A. Add 'bool draining' flag and set it all cpus before proceeding with irq_work_sync. The modified sequence looks like: for_each_cpu: WRITE_ONCE(c->draining, true); // make RCU callback a nop irq_work_sync(); // wait for irq_work callback (free_bulk) to finish for_each_cpu: drain_mem_cache(); // free all objects rcu_barrier_tasks_trace(); // wait for RCU callbacks to execute as a nop Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx> --- kernel/bpf/memalloc.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 4fd79bd51f5a..d68a854f45ee 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -98,6 +98,7 @@ struct bpf_mem_cache { int free_cnt; int low_watermark, high_watermark, batch; int percpu_size; + bool draining; /* list of objects to be freed after RCU tasks trace GP */ struct llist_head free_by_rcu_ttrace; @@ -252,7 +253,10 @@ static void __free_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace); + if (unlikely(READ_ONCE(c->draining))) + goto out; free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); +out: atomic_set(&c->call_rcu_ttrace_in_progress, 0); } @@ -542,16 +546,11 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) rcu_in_progress = 0; for_each_possible_cpu(cpu) { c = per_cpu_ptr(ma->cache, cpu); - /* - * refill_work may be unfinished for PREEMPT_RT kernel - * in which irq work is invoked in a per-CPU RT thread. - * It is also possible for kernel with - * arch_irq_work_has_interrupt() being false and irq - * work is invoked in timer interrupt. So waiting for - * the completion of irq work to ease the handling of - * concurrency. - */ + WRITE_ONCE(c->draining, true); irq_work_sync(&c->refill_work); + } + for_each_possible_cpu(cpu) { + c = per_cpu_ptr(ma->cache, cpu); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); } @@ -566,7 +565,14 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) cc = per_cpu_ptr(ma->caches, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; + WRITE_ONCE(c->draining, true); irq_work_sync(&c->refill_work); + } + } + for_each_possible_cpu(cpu) { + cc = per_cpu_ptr(ma->caches, cpu); + for (i = 0; i < NUM_CACHES; i++) { + c = &cc->cache[i]; drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); } -- 2.34.1