On Fri, May 28, 2021 at 4:53 PM Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> wrote: > > This is done to later reuse these in a way that can be shared > among multiple samples. > > We are using xdp_redirect_cpu_kern.c as a base to build further support on > top (mostly adding a few other things missing that xdp_monitor does in > subsequent patches). > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> > --- > samples/bpf/xdp_sample_kern.h | 220 ++++++++++++++++++++++++++++++++++ > 1 file changed, 220 insertions(+) > create mode 100644 samples/bpf/xdp_sample_kern.h > > diff --git a/samples/bpf/xdp_sample_kern.h b/samples/bpf/xdp_sample_kern.h instead of doing it as a header, can you please use BPF static linking instead? I think that's a better approach and a good showcase for anyone that would like to use static linking for their BPF programs > new file mode 100644 > index 000000000000..bb809542ac20 > --- /dev/null > +++ b/samples/bpf/xdp_sample_kern.h > @@ -0,0 +1,220 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ > +#pragma once > + > +#include <uapi/linux/bpf.h> > +#include <bpf/bpf_helpers.h> > + > +#define MAX_CPUS 64 > + > +/* Common stats data record to keep userspace more simple */ > +struct datarec { > + __u64 processed; > + __u64 dropped; > + __u64 issue; > + __u64 xdp_pass; > + __u64 xdp_drop; > + __u64 xdp_redirect; > +}; > + > +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success > + * feedback. Redirect TX errors can be caught via a tracepoint. > + */ > +struct { > + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); > + __type(key, u32); > + __type(value, struct datarec); > + __uint(max_entries, 1); > +} rx_cnt SEC(".maps"); > + > +/* Used by trace point */ > +struct { > + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); > + __type(key, u32); > + __type(value, struct datarec); > + __uint(max_entries, 2); > + /* TODO: have entries for all possible errno's */ > +} redirect_err_cnt SEC(".maps"); > + > +/* Used by trace point */ > +struct { > + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); > + __type(key, u32); > + __type(value, struct datarec); > + __uint(max_entries, MAX_CPUS); > +} cpumap_enqueue_cnt SEC(".maps"); One way to squeeze a bit more performance would be to instead use global variables instead of maps: struct datarec cpu_map_enqueue_cnts[MAX_CPUS][MAX_CPUS]; and other PERCPU_ARRAY arrays could be just one-dimensional arrays. You'd need to ensure each value sits on its own cache-line, of course. > + > +/* Used by trace point */ > +struct { > + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); > + __type(key, u32); > + __type(value, struct datarec); > + __uint(max_entries, 1); > +} cpumap_kthread_cnt SEC(".maps"); > + [...] > + > +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format > + * Code in: kernel/include/trace/events/xdp.h > + */ > +struct cpumap_enqueue_ctx { > + u64 __pad; // First 8 bytes are not accessible by bpf code > + int map_id; // offset:8; size:4; signed:1; > + u32 act; // offset:12; size:4; signed:0; > + int cpu; // offset:16; size:4; signed:1; > + unsigned int drops; // offset:20; size:4; signed:0; > + unsigned int processed; // offset:24; size:4; signed:0; > + int to_cpu; // offset:28; size:4; signed:1; > +}; if you used vmlinux.h, this is already in there as struct trace_event_raw_xdp_cpumap_enqueue, similarly for other tracepoints > + > +SEC("tracepoint/xdp/xdp_cpumap_enqueue") > +int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) > +{ > + u32 to_cpu = ctx->to_cpu; > + struct datarec *rec; > + > + if (to_cpu >= MAX_CPUS) > + return 1; > + > + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); > + if (!rec) > + return 0; > + rec->processed += ctx->processed; > + rec->dropped += ctx->drops; > + > + /* Record bulk events, then userspace can calc average bulk size */ > + if (ctx->processed > 0) > + rec->issue += 1; > + > + /* Inception: It's possible to detect overload situations, via > + * this tracepoint. This can be used for creating a feedback > + * loop to XDP, which can take appropriate actions to mitigate > + * this overload situation. > + */ > + return 0; > +} > + [...]