From: Björn Töpel <bjorn.topel@xxxxxxxxx> After the RCU flavor consolidation [1], call_rcu() and synchronize_rcu() waits for preempt-disable regions (NAPI) in addition to the read-side critical sections. As a result of this, the cleanup code in cpumap can be simplified * There is no longer a need to flush in __cpu_map_entry_free, since we know that this has been done when the call_rcu() callback is triggered. * When freeing the map, there is no need to explicitly wait for a flush. It's guaranteed to be done after the synchronize_rcu() call in cpu_map_free(). [1] https://lwn.net/Articles/777036/ Signed-off-by: Björn Töpel <bjorn.topel@xxxxxxxxx> --- kernel/bpf/cpumap.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index ef49e17ae47c..fbf176e0a2ab 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -75,7 +75,7 @@ struct bpf_cpu_map { struct list_head __percpu *flush_list; }; -static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx); +static int bq_flush_to_queue(struct xdp_bulk_queue *bq); static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) { @@ -399,7 +399,6 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, static void __cpu_map_entry_free(struct rcu_head *rcu) { struct bpf_cpu_map_entry *rcpu; - int cpu; /* This cpu_map_entry have been disconnected from map and one * RCU graze-period have elapsed. Thus, XDP cannot queue any @@ -408,13 +407,6 @@ static void __cpu_map_entry_free(struct rcu_head *rcu) */ rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); - /* Flush remaining packets in percpu bulkq */ - for_each_online_cpu(cpu) { - struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); - - /* No concurrent bq_enqueue can run at this point */ - bq_flush_to_queue(bq, false); - } free_percpu(rcpu->bulkq); /* Cannot kthread_stop() here, last put free rcpu resources */ put_cpu_map_entry(rcpu); @@ -522,18 +514,6 @@ static void cpu_map_free(struct bpf_map *map) bpf_clear_redirect_map(map); synchronize_rcu(); - /* To ensure all pending flush operations have completed wait for flush - * list be empty on _all_ cpus. Because the above synchronize_rcu() - * ensures the map is disconnected from the program we can assume no new - * items will be added to the list. - */ - for_each_online_cpu(cpu) { - struct list_head *flush_list = per_cpu_ptr(cmap->flush_list, cpu); - - while (!list_empty(flush_list)) - cond_resched(); - } - /* For cpu_map the remote CPUs can still be using the entries * (struct bpf_cpu_map_entry). */ @@ -599,7 +579,7 @@ const struct bpf_map_ops cpu_map_ops = { .map_check_btf = map_check_no_btf, }; -static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx) +static int bq_flush_to_queue(struct xdp_bulk_queue *bq) { struct bpf_cpu_map_entry *rcpu = bq->obj; unsigned int processed = 0, drops = 0; @@ -620,10 +600,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx) err = __ptr_ring_produce(q, xdpf); if (err) { drops++; - if (likely(in_napi_ctx)) - xdp_return_frame_rx_napi(xdpf); - else - xdp_return_frame(xdpf); + xdp_return_frame_rx_napi(xdpf); } processed++; } @@ -646,7 +623,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) - bq_flush_to_queue(bq, true); + bq_flush_to_queue(bq); /* Notice, xdp_buff/page MUST be queued here, long enough for * driver to code invoking us to finished, due to driver @@ -688,7 +665,7 @@ void __cpu_map_flush(struct bpf_map *map) struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { - bq_flush_to_queue(bq, true); + bq_flush_to_queue(bq); /* If already running, costs spin_lock_irqsave + smb_mb */ wake_up_process(bq->obj->kthread); -- 2.20.1