This is done to later reuse these in a way that can be shared among multiple samples. We are using xdp_redirect_cpu_kern.c as a base to build further support on top (mostly adding a few other things missing that xdp_monitor does in subsequent patches). Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> --- samples/bpf/xdp_sample_kern.h | 220 ++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 samples/bpf/xdp_sample_kern.h diff --git a/samples/bpf/xdp_sample_kern.h b/samples/bpf/xdp_sample_kern.h new file mode 100644 index 000000000000..bb809542ac20 --- /dev/null +++ b/samples/bpf/xdp_sample_kern.h @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 +/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ +#pragma once + +#include <uapi/linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define MAX_CPUS 64 + +/* Common stats data record to keep userspace more simple */ +struct datarec { + __u64 processed; + __u64 dropped; + __u64 issue; + __u64 xdp_pass; + __u64 xdp_drop; + __u64 xdp_redirect; +}; + +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success + * feedback. Redirect TX errors can be caught via a tracepoint. + */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, 1); +} rx_cnt SEC(".maps"); + +/* Used by trace point */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, 2); + /* TODO: have entries for all possible errno's */ +} redirect_err_cnt SEC(".maps"); + +/* Used by trace point */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, MAX_CPUS); +} cpumap_enqueue_cnt SEC(".maps"); + +/* Used by trace point */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, 1); +} cpumap_kthread_cnt SEC(".maps"); + +/* Used by trace point */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, 1); +} exception_cnt SEC(".maps"); + +/*** Trace point code ***/ + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_redirect_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12 size:4; signed:0; + int ifindex; // offset:16 size:4; signed:1; + int err; // offset:20 size:4; signed:1; + int to_ifindex; // offset:24 size:4; signed:1; + u32 map_id; // offset:28 size:4; signed:0; + int map_index; // offset:32 size:4; signed:1; +}; // offset:36 + +enum { + XDP_REDIRECT_SUCCESS = 0, + XDP_REDIRECT_ERROR = 1 +}; + +static __always_inline +int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) +{ + u32 key = XDP_REDIRECT_ERROR; + struct datarec *rec; + int err = ctx->err; + + if (!err) + key = XDP_REDIRECT_SUCCESS; + + rec = bpf_map_lookup_elem(&redirect_err_cnt, &key); + if (!rec) + return 0; + rec->dropped += 1; + + return 0; /* Indicate event was filtered (no further processing)*/ + /* + * Returning 1 here would allow e.g. a perf-record tracepoint + * to see and record these events, but it doesn't work well + * in-practice as stopping perf-record also unload this + * bpf_prog. Plus, there is additional overhead of doing so. + */ +} + +SEC("tracepoint/xdp/xdp_redirect_err") +int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +SEC("tracepoint/xdp/xdp_redirect_map_err") +int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_exception_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int ifindex; // offset:16; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_exception") +int trace_xdp_exception(struct xdp_exception_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&exception_cnt, &key); + if (!rec) + return 1; + rec->dropped += 1; + + return 0; +} + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_enqueue_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int to_cpu; // offset:28; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_cpumap_enqueue") +int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) +{ + u32 to_cpu = ctx->to_cpu; + struct datarec *rec; + + if (to_cpu >= MAX_CPUS) + return 1; + + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + + /* Record bulk events, then userspace can calc average bulk size */ + if (ctx->processed > 0) + rec->issue += 1; + + /* Inception: It's possible to detect overload situations, via + * this tracepoint. This can be used for creating a feedback + * loop to XDP, which can take appropriate actions to mitigate + * this overload situation. + */ + return 0; +} + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_kthread_ctx { + u64 __pad; // First 8 bytes are not accessible + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int sched; // offset:28; size:4; signed:1; + unsigned int xdp_pass; // offset:32; size:4; signed:0; + unsigned int xdp_drop; // offset:36; size:4; signed:0; + unsigned int xdp_redirect; // offset:40; size:4; signed:0; +}; + +SEC("tracepoint/xdp/xdp_cpumap_kthread") +int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + rec->xdp_pass += ctx->xdp_pass; + rec->xdp_drop += ctx->xdp_drop; + rec->xdp_redirect += ctx->xdp_redirect; + + /* Count times kthread yielded CPU via schedule call */ + if (ctx->sched) + rec->issue++; + + return 0; +} -- 2.31.1