Recently sched_switch tracepoint added a new argument for prev_state, but it's hard to handle the change in a BPF program. Instead, we can check the function prototype in BTF before loading the program. Thus I make two copies of the tracepoint handler and select one based on the BTF info. Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx> --- tools/perf/util/bpf_off_cpu.c | 28 +++++++++++++++ tools/perf/util/bpf_skel/off_cpu.bpf.c | 48 ++++++++++++++++++++------ 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 89f36229041d..31343db68ed3 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -86,6 +86,33 @@ static void off_cpu_finish(void *arg __maybe_unused) off_cpu_bpf__destroy(skel); } +/* recent kernel added prev_state arg, so it needs to call the proper function */ +static void check_sched_switch_args(void) +{ + const struct btf *btf = bpf_object__btf(skel->obj); + const struct btf_type *t1, *t2, *t3; + u32 type_id; + + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", + BTF_KIND_TYPEDEF); + if ((s32)type_id < 0) + return; + + t1 = btf__type_by_id(btf, type_id); + if (t1 == NULL) + return; + + t2 = btf__type_by_id(btf, t1->type); + if (t2 == NULL || !btf_is_ptr(t2)) + return; + + t3 = btf__type_by_id(btf, t2->type); + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { + /* new format: pass prev_state as 2nd arg */ + skel->rodata->has_prev_state = true; + } +} + int off_cpu_prepare(struct evlist *evlist, struct target *target) { int err, fd, i; @@ -114,6 +141,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target) } set_max_rlimit(); + check_sched_switch_args(); err = off_cpu_bpf__load(skel); if (err) { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index c35106b9e20b..98eaba95924f 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -72,6 +72,8 @@ int enabled = 0; int has_cpu = 0; int has_task = 0; +const volatile bool has_prev_state = false; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state) return 1; } -SEC("tp_btf/sched_switch") -int on_switch(u64 *ctx) +static int off_cpu_stat(u64 *ctx, struct task_struct *prev, + struct task_struct *next, int state) { __u64 ts; - int state; __u32 stack_id; - struct task_struct *prev, *next; struct tstamp_data *pelem; - if (!enabled) - return 0; - - prev = (struct task_struct *)ctx[1]; - next = (struct task_struct *)ctx[2]; - state = get_task_state(prev); - ts = bpf_ktime_get_ns(); if (!can_record(prev, state)) @@ -180,4 +173,37 @@ int on_switch(u64 *ctx) return 0; } +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + struct task_struct *prev, *next; + int prev_state; + + if (!enabled) + return 0; + + /* + * For v5.18+: + * TP_PROTO(bool preempt, int prev_state, + * struct task_struct *prev, + * struct task_struct *next) + * + * On older kernels: + * TP_PROTO(bool preempt, struct task_struct *prev, + * struct task_struct *next) + */ + if (has_prev_state) { + prev = (struct task_struct *)ctx[2]; + next = (struct task_struct *)ctx[3]; + prev_state = (int)ctx[1]; + } else { + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + + prev_state = get_task_state(prev); + } + + return off_cpu_stat(ctx, prev, next, prev_state); +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; -- 2.36.0.512.ge40c2bad7a-goog