From: Feng Zhou <zhoufeng.zf@xxxxxxxxxxxxx> This patch use head->first in pcpu_freelist_head to check freelist having free or not. If having, grab spin_lock, or check next cpu's freelist. Before patch: hash_map performance ./map_perf_test 1 0:hash_map_perf pre-alloc 1043397 events per sec ... The average of the test results is around 1050000 events per sec. hash_map the worst: no free ./run_bench_bpf_hashmap_full_update.sh Setting up benchmark 'bpf-hashmap-ful-update'... Benchmark 'bpf-hashmap-ful-update' started. 1:hash_map_full_perf 15687 events per sec ... The average of the test results is around 16000 events per sec. ftrace trace: 0) | htab_map_update_elem() { 0) | __pcpu_freelist_pop() { 0) | _raw_spin_lock() 0) | _raw_spin_unlock() 0) | ... 0) + 25.188 us | } 0) + 28.439 us | } The test machine is 16C, trying to get spin_lock 17 times, in addition to 16c, there is an extralist. after patch: hash_map performance ./map_perf_test 1 0:hash_map_perf pre-alloc 1053298 events per sec ... The average of the test results is around 1050000 events per sec. hash_map worst: no free ./run_bench_bpf_hashmap_full_update.sh Setting up benchmark 'bpf-hashmap-ful-update'... Benchmark 'bpf-hashmap-ful-update' started. 1:hash_map_full_perf 555830 events per sec ... The average of the test results is around 550000 events per sec. ftrace trace: 0) | htab_map_update_elem() { 0) | alloc_htab_elem() { 0) 0.586 us | __pcpu_freelist_pop(); 0) 0.945 us | } 0) 8.669 us | } It can be seen that after adding this patch, the map performance is almost not degraded, and when free=0, first check head->first instead of directly acquiring spin_lock. Co-developed-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> Signed-off-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> Signed-off-by: Feng Zhou <zhoufeng.zf@xxxxxxxxxxxxx> --- kernel/bpf/percpu_freelist.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 3d897de89061..00b874c8e889 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -31,7 +31,7 @@ static inline void pcpu_freelist_push_node(struct pcpu_freelist_head *head, struct pcpu_freelist_node *node) { node->next = head->first; - head->first = node; + WRITE_ONCE(head->first, node); } static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, @@ -130,14 +130,17 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s) orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); + if (!READ_ONCE(head->first)) + goto next_cpu; raw_spin_lock(&head->lock); node = head->first; if (node) { - head->first = node->next; + WRITE_ONCE(head->first, node->next); raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); +next_cpu: cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; @@ -146,10 +149,12 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s) } /* per cpu lists are all empty, try extralist */ + if (!READ_ONCE(s->extralist.first)) + return NULL; raw_spin_lock(&s->extralist.lock); node = s->extralist.first; if (node) - s->extralist.first = node->next; + WRITE_ONCE(s->extralist.first, node->next); raw_spin_unlock(&s->extralist.lock); return node; } @@ -164,15 +169,18 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s) orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); + if (!READ_ONCE(head->first)) + goto next_cpu; if (raw_spin_trylock(&head->lock)) { node = head->first; if (node) { - head->first = node->next; + WRITE_ONCE(head->first, node->next); raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); } +next_cpu: cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; @@ -181,11 +189,11 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s) } /* cannot pop from per cpu lists, try extralist */ - if (!raw_spin_trylock(&s->extralist.lock)) + if (!READ_ONCE(s->extralist.first) || !raw_spin_trylock(&s->extralist.lock)) return NULL; node = s->extralist.first; if (node) - s->extralist.first = node->next; + WRITE_ONCE(s->extralist.first, node->next); raw_spin_unlock(&s->extralist.lock); return node; } -- 2.20.1