Re: [PATCH bpf-next] bpf: Restrict attachment of bpf program to some tracepoints

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On 7 Dec 2022, at 9:39 PM, Jiri Olsa <olsajiri@xxxxxxxxx> wrote:
> 
> On Sun, Dec 04, 2022 at 10:44:52PM +0100, Jiri Olsa wrote:
>> On Wed, Nov 30, 2022 at 03:29:39PM -0800, Andrii Nakryiko wrote:
>>> On Fri, Nov 25, 2022 at 1:35 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote:
>>>> 
>>>> On Thu, Nov 24, 2022 at 09:17:22AM -0800, Alexei Starovoitov wrote:
>>>>> On Thu, Nov 24, 2022 at 1:42 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote:
>>>>>> 
>>>>>> On Thu, Nov 24, 2022 at 01:41:23AM +0100, Daniel Borkmann wrote:
>>>>>>> On 11/21/22 10:31 PM, Jiri Olsa wrote:
>>>>>>>> We hit following issues [1] [2] when we attach bpf program that calls
>>>>>>>> bpf_trace_printk helper to the contention_begin tracepoint.
>>>>>>>> 
>>>>>>>> As described in [3] with multiple bpf programs that call bpf_trace_printk
>>>>>>>> helper attached to the contention_begin might result in exhaustion of
>>>>>>>> printk buffer or cause a deadlock [2].
>>>>>>>> 
>>>>>>>> There's also another possible deadlock when multiple bpf programs attach
>>>>>>>> to bpf_trace_printk tracepoint and call one of the printk bpf helpers.
>>>>>>>> 
>>>>>>>> This change denies the attachment of bpf program to contention_begin
>>>>>>>> and bpf_trace_printk tracepoints if the bpf program calls one of the
>>>>>>>> printk bpf helpers.
>>>>>>>> 
>>>>>>>> Adding also verifier check for tb_btf programs, so this can be cought
>>>>>>>> in program loading time with error message like:
>>>>>>>> 
>>>>>>>>   Can't attach program with bpf_trace_printk#6 helper to contention_begin tracepoint.
>>>>>>>> 
>>>>>>>> [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@xxxxxxxxxxxxxx/
>>>>>>>> [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@xxxxxxxxxxxxxx/
>>>>>>>> [3] https://lore.kernel.org/bpf/Y2j6ivTwFmA0FtvY@krava/
>>>>>>>> 
>>>>>>>> Reported-by: Hao Sun <sunhao.th@xxxxxxxxx>
>>>>>>>> Suggested-by: Alexei Starovoitov <ast@xxxxxxxxxx>
>>>>>>>> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
>>>>>>>> ---
>>>>>>>>  include/linux/bpf.h          |  1 +
>>>>>>>>  include/linux/bpf_verifier.h |  2 ++
>>>>>>>>  kernel/bpf/syscall.c         |  3 +++
>>>>>>>>  kernel/bpf/verifier.c        | 46 ++++++++++++++++++++++++++++++++++++
>>>>>>>>  4 files changed, 52 insertions(+)
>>>>>>>> 
>>>>>>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>>>>>>> index c9eafa67f2a2..3ccabede0f50 100644
>>>>>>>> --- a/include/linux/bpf.h
>>>>>>>> +++ b/include/linux/bpf.h
>>>>>>>> @@ -1319,6 +1319,7 @@ struct bpf_prog {
>>>>>>>>                            enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
>>>>>>>>                            call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
>>>>>>>>                            call_get_func_ip:1, /* Do we call get_func_ip() */
>>>>>>>> +                           call_printk:1, /* Do we call trace_printk/trace_vprintk  */
>>>>>>>>                            tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
>>>>>>>>    enum bpf_prog_type      type;           /* Type of BPF program */
>>>>>>>>    enum bpf_attach_type    expected_attach_type; /* For some prog types */
>>>>>>>> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
>>>>>>>> index 545152ac136c..7118c2fda59d 100644
>>>>>>>> --- a/include/linux/bpf_verifier.h
>>>>>>>> +++ b/include/linux/bpf_verifier.h
>>>>>>>> @@ -618,6 +618,8 @@ bool is_dynptr_type_expected(struct bpf_verifier_env *env,
>>>>>>>>                         struct bpf_reg_state *reg,
>>>>>>>>                         enum bpf_arg_type arg_type);
>>>>>>>> +int bpf_check_tp_printk_denylist(const char *name, struct bpf_prog *prog);
>>>>>>>> +
>>>>>>>>  /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
>>>>>>>>  static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
>>>>>>>>                                         struct btf *btf, u32 btf_id)
>>>>>>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>>>>>>> index 35972afb6850..9a69bda7d62b 100644
>>>>>>>> --- a/kernel/bpf/syscall.c
>>>>>>>> +++ b/kernel/bpf/syscall.c
>>>>>>>> @@ -3329,6 +3329,9 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
>>>>>>>>            return -EINVAL;
>>>>>>>>    }
>>>>>>>> +   if (bpf_check_tp_printk_denylist(tp_name, prog))
>>>>>>>> +           return -EACCES;
>>>>>>>> +
>>>>>>>>    btp = bpf_get_raw_tracepoint(tp_name);
>>>>>>>>    if (!btp)
>>>>>>>>            return -ENOENT;
>>>>>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>>>>>>> index f07bec227fef..b662bc851e1c 100644
>>>>>>>> --- a/kernel/bpf/verifier.c
>>>>>>>> +++ b/kernel/bpf/verifier.c
>>>>>>>> @@ -7472,6 +7472,47 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno
>>>>>>>>                             state->callback_subprogno == subprogno);
>>>>>>>>  }
>>>>>>>> +int bpf_check_tp_printk_denylist(const char *name, struct bpf_prog *prog)
>>>>>>>> +{
>>>>>>>> +   static const char * const denylist[] = {
>>>>>>>> +           "contention_begin",
>>>>>>>> +           "bpf_trace_printk",
>>>>>>>> +   };
>>>>>>>> +   int i;
>>>>>>>> +
>>>>>>>> +   /* Do not allow attachment to denylist[] tracepoints,
>>>>>>>> +    * if the program calls some of the printk helpers,
>>>>>>>> +    * because there's possibility of deadlock.
>>>>>>>> +    */
>>>>>>> 
>>>>>>> What if that prog doesn't but tail calls into another one which calls printk helpers?
>>>>>> 
>>>>>> right, I'll deny that for all BPF_PROG_TYPE_RAW_TRACEPOINT* programs,
>>>>>> because I don't see easy way to check on that
>>>>>> 
>>>>>> we can leave printk check for tracing BPF_TRACE_RAW_TP programs,
>>>>>> because verifier known the exact tracepoint already
>>>>> 
>>>>> This is all fragile and merely a stop gap.
>>>>> Doesn't sound that the issue is limited to bpf_trace_printk
>>>> 
>>>> hm, I don't have a better idea how to fix that.. I can't deny
>>>> contention_begin completely, because we use it in perf via
>>>> tp_btf/contention_begin (perf lock contention) and I don't
>>>> think there's another way for perf to do that
>>>> 
>>>> fwiw the last version below denies BPF_PROG_TYPE_RAW_TRACEPOINT
>>>> programs completely and tracing BPF_TRACE_RAW_TP with printks
>>>> 
>>> 
>>> I think disabling bpf_trace_printk() tracepoint for any BPF program is
>>> totally fine. This tracepoint was never intended to be attached to.
>>> 
>>> But as for the general bpf_trace_printk() deadlocking. Should we
>>> discuss how to make it not deadlock instead of starting to denylist
>>> things left and right?
>>> 
>>> Do I understand that we take trace_printk_lock only to protect that
>>> static char buf[]? Can we just make this buf per-CPU and do a trylock
>>> instead? We'll only fail to bpf_trace_printk() something if we have
>>> nested BPF programs (rare) or NMI (also rare).
>> 
>> ugh, sorry I overlooked your reply :-\
>> 
>> sounds good.. if it'd be acceptable to use trylock, we'd get rid of the
>> contention_begin tracepoint being triggered, which was the case for deadlock
> 
> looks like we can remove the spinlock completely by using the
> nested level buffer approach same way as in bpf_bprintf_prepare
> 
> it gets rid of the contention_begin tracepoint, so I'm not being
> able to trigger the issue in my test
> 
> jirka
> 
> 
> ---
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c810c..d6afde7311f8 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -369,33 +369,60 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
> return &bpf_probe_write_user_proto;
> }
> 
> -static DEFINE_RAW_SPINLOCK(trace_printk_lock);
> -
> #define MAX_TRACE_PRINTK_VARARGS 3
> #define BPF_TRACE_PRINTK_SIZE 1024
> +#define BPF_TRACE_PRINTK_NEST 3
> +
> +struct trace_printk_buf {
> + char data[BPF_TRACE_PRINTK_NEST][BPF_TRACE_PRINTK_SIZE];
> + int nest;
> +};
> +static DEFINE_PER_CPU(struct trace_printk_buf, printk_buf);
> +
> +static void put_printk_buf(struct trace_printk_buf __percpu *buf)
> +{
> + this_cpu_dec(buf->nest);
> + preempt_enable();
> +}
> +
> +static bool get_printk_buf(struct trace_printk_buf __percpu *buf, char **data)
> +{
> + int nest;
> +
> + preempt_disable();
> + nest = this_cpu_inc_return(buf->nest);
> + if (nest > BPF_TRACE_PRINTK_NEST) {
> + put_printk_buf(buf);
> + return false;
> + }
> + *data = (char *) this_cpu_ptr(&buf->data[nest - 1]);
> + return true;
> +}
> 
> BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
>    u64, arg2, u64, arg3)
> {
> u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
> u32 *bin_args;
> - static char buf[BPF_TRACE_PRINTK_SIZE];
> - unsigned long flags;
> + char *buf;
> int ret;
> 
> + if (!get_printk_buf(&printk_buf, &buf))
> + return -EBUSY;
> +
> ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
>   MAX_TRACE_PRINTK_VARARGS);
> if (ret < 0)
> - return ret;
> + goto out;
> 
> - raw_spin_lock_irqsave(&trace_printk_lock, flags);
> - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args);
> 
> trace_bpf_trace_printk(buf);
> - raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> 
> bpf_bprintf_cleanup();
> 
> +out:
> + put_printk_buf(&printk_buf);
> return ret;
> }
> 
> @@ -427,31 +454,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> return &bpf_trace_printk_proto;
> }
> 
> +static DEFINE_PER_CPU(struct trace_printk_buf, vprintk_buf);
> +
> BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
>    u32, data_len)
> {
> - static char buf[BPF_TRACE_PRINTK_SIZE];
> - unsigned long flags;
> int ret, num_args;
> u32 *bin_args;
> + char *buf;
> 
> if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
>     (data_len && !data))
> return -EINVAL;
> num_args = data_len / 8;
> 
> + if (!get_printk_buf(&vprintk_buf, &buf))
> + return -EBUSY;
> +
> ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
> if (ret < 0)
> - return ret;
> + goto out;
> 
> - raw_spin_lock_irqsave(&trace_printk_lock, flags);
> - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args);
> 
> trace_bpf_trace_printk(buf);
> - raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> 
> bpf_bprintf_cleanup();
> 
> +out:
> + put_printk_buf(&vprintk_buf);
> return ret;
> }

Tested on a latest bpf-next build, I can confirm the patch also fixes
this[1] issue.

[1] https://lore.kernel.org/bpf/CACkBjsb3GRw5aiTT=RCUs3H5aum_QN+B0ZqZA=MvjspUP6NFMg@xxxxxxxxxxxxxx/T/#u



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux