The bpf_perf_event_aux_pause kfunc will be used to control the Perf AUX area to pause or resume. An example use-case is attaching eBPF to Ftrace tracepoints. When a tracepoint is hit, the associated eBPF program will be executed. The eBPF program can invoke bpf_perf_event_aux_pause() to pause or resume AUX trace. This is useful for fine-grained tracing by combining Perf and eBPF. This commit implements the bpf_perf_event_aux_pause kfunc, and make it pass the eBPF verifier. Signed-off-by: Leo Yan <leo.yan@xxxxxxx> --- include/uapi/linux/bpf.h | 21 ++++++++++++++++ kernel/bpf/verifier.c | 2 ++ kernel/trace/bpf_trace.c | 52 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4162afc6b5d0..678278c91ce2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5795,6 +5795,26 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * long bpf_perf_event_aux_pause(struct bpf_map *map, u64 flags, u32 pause) + * Description + * Pause or resume an AUX area trace associated to the perf event. + * + * The *flags* argument is specified as the key value for + * retrieving event pointer from the passed *map*. + * + * The *pause* argument controls AUX trace pause or resume. + * Non-zero values (true) are to pause the AUX trace and the zero + * value (false) is for re-enabling the AUX trace. + * Return + * 0 on success. + * + * **-ENOENT** if not found event in the events map. + * + * **-E2BIG** if the event index passed in the *flags* parameter + * is out-of-range of the map. + * + * **-EINVAL** if the flags passed is an invalid value. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -6009,6 +6029,7 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(perf_event_aux_pause, 212, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 09f7fa635f67..1f3acd8a7de3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9315,6 +9315,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_perf_event_output && func_id != BPF_FUNC_skb_output && func_id != BPF_FUNC_perf_event_read_value && + func_id != BPF_FUNC_perf_event_aux_pause && func_id != BPF_FUNC_xdp_output) goto error; break; @@ -9443,6 +9444,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_read: case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: + case BPF_FUNC_perf_event_aux_pause: case BPF_FUNC_skb_output: case BPF_FUNC_xdp_output: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 949a3870946c..a3b857f6cab4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -617,6 +617,56 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; +BPF_CALL_3(bpf_perf_event_aux_pause, struct bpf_map *, map, u64, flags, + u32, pause) +{ + unsigned long irq_flags; + struct bpf_array *array = container_of(map, struct bpf_array, map); + unsigned int cpu = smp_processor_id(); + u64 index = flags & BPF_F_INDEX_MASK; + struct bpf_event_entry *ee; + int ret = 0; + + /* + * Disabling interrupts avoids scheduling and race condition with + * perf event enabling and disabling flow. + */ + local_irq_save(irq_flags); + + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { + ret = -EINVAL; + goto out; + } + + if (index == BPF_F_CURRENT_CPU) + index = cpu; + if (unlikely(index >= array->map.max_entries)) { + ret = -E2BIG; + goto out; + } + + ee = READ_ONCE(array->ptrs[index]); + if (!ee) { + ret = -ENOENT; + goto out; + } + + perf_event_aux_pause(ee->event, pause); + +out: + local_irq_restore(irq_flags); + return ret; +} + +static const struct bpf_func_proto bpf_perf_event_aux_pause_proto = { + .func = bpf_perf_event_aux_pause, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_sample_data *sd) @@ -1565,6 +1615,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_find_vma_proto; case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); + case BPF_FUNC_perf_event_aux_pause: + return &bpf_perf_event_aux_pause_proto; default: return bpf_base_func_proto(func_id, prog); } -- 2.34.1