Re: [PATCH bpf-next] bpf: Restrict attachment of bpf program to some tracepoints

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On 7 Dec 2022, at 10:14 AM, Namhyung Kim <namhyung@xxxxxxxxx> wrote:
> 
> On Tue, Dec 06, 2022 at 12:09:51PM -0800, Alexei Starovoitov wrote:
>> On Mon, Dec 5, 2022 at 4:28 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote:
>>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>>> index 3bbd3f0c810c..d27b7dc77894 100644
>>> --- a/kernel/trace/bpf_trace.c
>>> +++ b/kernel/trace/bpf_trace.c
>>> @@ -2252,9 +2252,8 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
>>> }
>>> 
>>> static __always_inline
>>> -void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>>> +void __bpf_trace_prog_run(struct bpf_prog *prog, u64 *args)
>>> {
>>> -       cant_sleep();
>>>        if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
>>>                bpf_prog_inc_misses_counter(prog);
>>>                goto out;
>>> @@ -2266,6 +2265,22 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>>>        this_cpu_dec(*(prog->active));
>>> }
>>> 
>>> +static __always_inline
>>> +void __bpf_trace_run(struct bpf_raw_event_data *data, u64 *args)
>>> +{
>>> +       struct bpf_prog *prog = data->prog;
>>> +
>>> +       cant_sleep();
>>> +       if (unlikely(!data->recursion))
>>> +               return __bpf_trace_prog_run(prog, args);
>>> +
>>> +       if (unlikely(this_cpu_inc_return(*(data->recursion))))
>>> +               goto out;
>>> +       __bpf_trace_prog_run(prog, args);
>>> +out:
>>> +       this_cpu_dec(*(data->recursion));
>>> +}
>> 
>> This is way too much run-time and memory overhead to address
>> this corner case. Pls come up with some other approach.
>> Sorry I don't have decent suggestions at the moment.
>> For now we can simply disallow attaching to contention_begin.
>> 
> 
> How about this?  It seems to work for me.

How about progs that are attached with kprobe?
See this one:
https://lore.kernel.org/bpf/CACkBjsb3GRw5aiTT=RCUs3H5aum_QN+B0ZqZA=MvjspUP6NFMg@xxxxxxxxxxxxxx/T/#u

> 
> Thanks,
> Namhyung
> 
> ---
> include/linux/trace_events.h    | 14 +++++++
> include/linux/tracepoint-defs.h |  5 +++
> kernel/bpf/syscall.c            | 18 ++++++++-
> kernel/trace/bpf_trace.c        | 65 ++++++++++++++++++++++++++++++---
> 4 files changed, 95 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 20749bd9db71..461468210a77 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -742,6 +742,10 @@ void perf_event_detach_bpf_prog(struct perf_event *event);
> int perf_event_query_prog_array(struct perf_event *event, void __user *info);
> int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
> int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
> +int bpf_probe_register_norecurse(struct bpf_raw_event_map *btp, struct bpf_prog *prog,
> +  struct bpf_raw_event_data *data);
> +int bpf_probe_unregister_norecurse(struct bpf_raw_event_map *btp,
> +    struct bpf_raw_event_data *data);
> struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
> void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
> int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> @@ -775,6 +779,16 @@ static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf
> {
> return -EOPNOTSUPP;
> }
> +static inline int bpf_probe_register_norecurse(struct bpf_raw_event_map *btp, struct bpf_prog *p,
> +        struct bpf_raw_event_data *data)
> +{
> + return -EOPNOTSUPP;
> +}
> +static inline int bpf_probe_unregister_norecurse(struct bpf_raw_event_map *btp,
> +  struct bpf_raw_event_data *data)
> +{
> + return -EOPNOTSUPP;
> +}
> static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
> {
> return NULL;
> diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
> index e7c2276be33e..e5adfe606888 100644
> --- a/include/linux/tracepoint-defs.h
> +++ b/include/linux/tracepoint-defs.h
> @@ -53,6 +53,11 @@ struct bpf_raw_event_map {
> u32 writable_size;
> } __aligned(32);
> 
> +struct bpf_raw_event_data {
> + struct bpf_prog *prog;
> + int __percpu *active;
> +};
> +
> /*
>  * If a tracepoint needs to be called from a header file, it is not
>  * recommended to call it directly, as tracepoints in header files
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 35972afb6850..a8be9c443306 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3144,14 +3144,24 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
> struct bpf_raw_tp_link {
> struct bpf_link link;
> struct bpf_raw_event_map *btp;
> + struct bpf_raw_event_data data;
> };
> 
> +static bool needs_recursion_check(struct bpf_raw_event_map *btp)
> +{
> + return !strcmp(btp->tp->name, "contention_begin");
> +}
> +
> static void bpf_raw_tp_link_release(struct bpf_link *link)
> {
> struct bpf_raw_tp_link *raw_tp =
> container_of(link, struct bpf_raw_tp_link, link);
> 
> - bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
> + if (needs_recursion_check(raw_tp->btp))
> + bpf_probe_unregister_norecurse(raw_tp->btp, &raw_tp->data);
> + else
> + bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
> +
> bpf_put_raw_tracepoint(raw_tp->btp);
> }
> 
> @@ -3348,7 +3358,11 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
> goto out_put_btp;
> }
> 
> - err = bpf_probe_register(link->btp, prog);
> + if (needs_recursion_check(link->btp))
> + err = bpf_probe_register_norecurse(link->btp, prog, &link->data);
> + else
> + err = bpf_probe_register(link->btp, prog);
> +
> if (err) {
> bpf_link_cleanup(&link_primer);
> goto out_put_btp;
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c810c..edbfeff029aa 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2297,7 +2297,20 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
> REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
> __bpf_trace_run(prog, args); \
> } \
> - EXPORT_SYMBOL_GPL(bpf_trace_run##x)
> + EXPORT_SYMBOL_GPL(bpf_trace_run##x); \
> + \
> + static void bpf_trace_run_norecurse##x(struct bpf_raw_event_data *data, \
> +       REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
> + { \
> + u64 args[x]; \
> + if (unlikely(this_cpu_inc_return(*(data->active)) != 1)) \
> + goto out; \
> + REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
> + __bpf_trace_run(data->prog, args); \
> + out: \
> + this_cpu_dec(*(data->active)); \
> + }
> +
> BPF_TRACE_DEFN_x(1);
> BPF_TRACE_DEFN_x(2);
> BPF_TRACE_DEFN_x(3);
> @@ -2311,7 +2324,23 @@ BPF_TRACE_DEFN_x(10);
> BPF_TRACE_DEFN_x(11);
> BPF_TRACE_DEFN_x(12);
> 
> -static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
> +static void *bpf_trace_norecurse_funcs[12] = {
> + (void *)bpf_trace_run_norecurse1,
> + (void *)bpf_trace_run_norecurse2,
> + (void *)bpf_trace_run_norecurse3,
> + (void *)bpf_trace_run_norecurse4,
> + (void *)bpf_trace_run_norecurse5,
> + (void *)bpf_trace_run_norecurse6,
> + (void *)bpf_trace_run_norecurse7,
> + (void *)bpf_trace_run_norecurse8,
> + (void *)bpf_trace_run_norecurse9,
> + (void *)bpf_trace_run_norecurse10,
> + (void *)bpf_trace_run_norecurse11,
> + (void *)bpf_trace_run_norecurse12,
> +};
> +
> +static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog,
> + void *func, void *data)
> {
> struct tracepoint *tp = btp->tp;
> 
> @@ -2325,13 +2354,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
> if (prog->aux->max_tp_access > btp->writable_size)
> return -EINVAL;
> 
> - return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
> -    prog);
> + return tracepoint_probe_register_may_exist(tp, func, data);
> }
> 
> int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
> {
> - return __bpf_probe_register(btp, prog);
> + return __bpf_probe_register(btp, prog, btp->bpf_func, prog);
> }
> 
> int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
> @@ -2339,6 +2367,33 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
> return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
> }
> 
> +int bpf_probe_register_norecurse(struct bpf_raw_event_map *btp, struct bpf_prog *prog,
> +  struct bpf_raw_event_data *data)
> +{
> + void *bpf_func;
> +
> + data->active = alloc_percpu_gfp(int, GFP_KERNEL);
> + if (!data->active)
> + return -ENOMEM;
> +
> + data->prog = prog;
> + bpf_func = bpf_trace_norecurse_funcs[btp->num_args];
> + return __bpf_probe_register(btp, prog, bpf_func, data);
> +}
> +
> +int bpf_probe_unregister_norecurse(struct bpf_raw_event_map *btp,
> +    struct bpf_raw_event_data *data)
> +{
> + int err;
> + void *bpf_func;
> +
> + bpf_func = bpf_trace_norecurse_funcs[btp->num_args];
> + err = tracepoint_probe_unregister(btp->tp, bpf_func, data);
> + free_percpu(data->active);
> +
> + return err;
> +}
> +
> int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
>     u32 *fd_type, const char **buf,
>     u64 *probe_offset, u64 *probe_addr)
> -- 
> 2.39.0.rc0.267.gcb52ba06e7-goog






[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux