> On 7 Dec 2022, at 9:39 PM, Jiri Olsa <olsajiri@xxxxxxxxx> wrote: > > On Sun, Dec 04, 2022 at 10:44:52PM +0100, Jiri Olsa wrote: >> On Wed, Nov 30, 2022 at 03:29:39PM -0800, Andrii Nakryiko wrote: >>> On Fri, Nov 25, 2022 at 1:35 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote: >>>> >>>> On Thu, Nov 24, 2022 at 09:17:22AM -0800, Alexei Starovoitov wrote: >>>>> On Thu, Nov 24, 2022 at 1:42 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote: >>>>>> >>>>>> On Thu, Nov 24, 2022 at 01:41:23AM +0100, Daniel Borkmann wrote: >>>>>>> On 11/21/22 10:31 PM, Jiri Olsa wrote: >>>>>>>> We hit following issues [1] [2] when we attach bpf program that calls >>>>>>>> bpf_trace_printk helper to the contention_begin tracepoint. >>>>>>>> >>>>>>>> As described in [3] with multiple bpf programs that call bpf_trace_printk >>>>>>>> helper attached to the contention_begin might result in exhaustion of >>>>>>>> printk buffer or cause a deadlock [2]. >>>>>>>> >>>>>>>> There's also another possible deadlock when multiple bpf programs attach >>>>>>>> to bpf_trace_printk tracepoint and call one of the printk bpf helpers. >>>>>>>> >>>>>>>> This change denies the attachment of bpf program to contention_begin >>>>>>>> and bpf_trace_printk tracepoints if the bpf program calls one of the >>>>>>>> printk bpf helpers. >>>>>>>> >>>>>>>> Adding also verifier check for tb_btf programs, so this can be cought >>>>>>>> in program loading time with error message like: >>>>>>>> >>>>>>>> Can't attach program with bpf_trace_printk#6 helper to contention_begin tracepoint. >>>>>>>> >>>>>>>> [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@xxxxxxxxxxxxxx/ >>>>>>>> [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@xxxxxxxxxxxxxx/ >>>>>>>> [3] https://lore.kernel.org/bpf/Y2j6ivTwFmA0FtvY@krava/ >>>>>>>> >>>>>>>> Reported-by: Hao Sun <sunhao.th@xxxxxxxxx> >>>>>>>> Suggested-by: Alexei Starovoitov <ast@xxxxxxxxxx> >>>>>>>> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> >>>>>>>> --- >>>>>>>> include/linux/bpf.h | 1 + >>>>>>>> include/linux/bpf_verifier.h | 2 ++ >>>>>>>> kernel/bpf/syscall.c | 3 +++ >>>>>>>> kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++++++++++++++ >>>>>>>> 4 files changed, 52 insertions(+) >>>>>>>> >>>>>>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >>>>>>>> index c9eafa67f2a2..3ccabede0f50 100644 >>>>>>>> --- a/include/linux/bpf.h >>>>>>>> +++ b/include/linux/bpf.h >>>>>>>> @@ -1319,6 +1319,7 @@ struct bpf_prog { >>>>>>>> enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ >>>>>>>> call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ >>>>>>>> call_get_func_ip:1, /* Do we call get_func_ip() */ >>>>>>>> + call_printk:1, /* Do we call trace_printk/trace_vprintk */ >>>>>>>> tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ >>>>>>>> enum bpf_prog_type type; /* Type of BPF program */ >>>>>>>> enum bpf_attach_type expected_attach_type; /* For some prog types */ >>>>>>>> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h >>>>>>>> index 545152ac136c..7118c2fda59d 100644 >>>>>>>> --- a/include/linux/bpf_verifier.h >>>>>>>> +++ b/include/linux/bpf_verifier.h >>>>>>>> @@ -618,6 +618,8 @@ bool is_dynptr_type_expected(struct bpf_verifier_env *env, >>>>>>>> struct bpf_reg_state *reg, >>>>>>>> enum bpf_arg_type arg_type); >>>>>>>> +int bpf_check_tp_printk_denylist(const char *name, struct bpf_prog *prog); >>>>>>>> + >>>>>>>> /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ >>>>>>>> static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, >>>>>>>> struct btf *btf, u32 btf_id) >>>>>>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c >>>>>>>> index 35972afb6850..9a69bda7d62b 100644 >>>>>>>> --- a/kernel/bpf/syscall.c >>>>>>>> +++ b/kernel/bpf/syscall.c >>>>>>>> @@ -3329,6 +3329,9 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, >>>>>>>> return -EINVAL; >>>>>>>> } >>>>>>>> + if (bpf_check_tp_printk_denylist(tp_name, prog)) >>>>>>>> + return -EACCES; >>>>>>>> + >>>>>>>> btp = bpf_get_raw_tracepoint(tp_name); >>>>>>>> if (!btp) >>>>>>>> return -ENOENT; >>>>>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c >>>>>>>> index f07bec227fef..b662bc851e1c 100644 >>>>>>>> --- a/kernel/bpf/verifier.c >>>>>>>> +++ b/kernel/bpf/verifier.c >>>>>>>> @@ -7472,6 +7472,47 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno >>>>>>>> state->callback_subprogno == subprogno); >>>>>>>> } >>>>>>>> +int bpf_check_tp_printk_denylist(const char *name, struct bpf_prog *prog) >>>>>>>> +{ >>>>>>>> + static const char * const denylist[] = { >>>>>>>> + "contention_begin", >>>>>>>> + "bpf_trace_printk", >>>>>>>> + }; >>>>>>>> + int i; >>>>>>>> + >>>>>>>> + /* Do not allow attachment to denylist[] tracepoints, >>>>>>>> + * if the program calls some of the printk helpers, >>>>>>>> + * because there's possibility of deadlock. >>>>>>>> + */ >>>>>>> >>>>>>> What if that prog doesn't but tail calls into another one which calls printk helpers? >>>>>> >>>>>> right, I'll deny that for all BPF_PROG_TYPE_RAW_TRACEPOINT* programs, >>>>>> because I don't see easy way to check on that >>>>>> >>>>>> we can leave printk check for tracing BPF_TRACE_RAW_TP programs, >>>>>> because verifier known the exact tracepoint already >>>>> >>>>> This is all fragile and merely a stop gap. >>>>> Doesn't sound that the issue is limited to bpf_trace_printk >>>> >>>> hm, I don't have a better idea how to fix that.. I can't deny >>>> contention_begin completely, because we use it in perf via >>>> tp_btf/contention_begin (perf lock contention) and I don't >>>> think there's another way for perf to do that >>>> >>>> fwiw the last version below denies BPF_PROG_TYPE_RAW_TRACEPOINT >>>> programs completely and tracing BPF_TRACE_RAW_TP with printks >>>> >>> >>> I think disabling bpf_trace_printk() tracepoint for any BPF program is >>> totally fine. This tracepoint was never intended to be attached to. >>> >>> But as for the general bpf_trace_printk() deadlocking. Should we >>> discuss how to make it not deadlock instead of starting to denylist >>> things left and right? >>> >>> Do I understand that we take trace_printk_lock only to protect that >>> static char buf[]? Can we just make this buf per-CPU and do a trylock >>> instead? We'll only fail to bpf_trace_printk() something if we have >>> nested BPF programs (rare) or NMI (also rare). >> >> ugh, sorry I overlooked your reply :-\ >> >> sounds good.. if it'd be acceptable to use trylock, we'd get rid of the >> contention_begin tracepoint being triggered, which was the case for deadlock > > looks like we can remove the spinlock completely by using the > nested level buffer approach same way as in bpf_bprintf_prepare > > it gets rid of the contention_begin tracepoint, so I'm not being > able to trigger the issue in my test > > jirka > > > --- > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 3bbd3f0c810c..d6afde7311f8 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -369,33 +369,60 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) > return &bpf_probe_write_user_proto; > } > > -static DEFINE_RAW_SPINLOCK(trace_printk_lock); > - > #define MAX_TRACE_PRINTK_VARARGS 3 > #define BPF_TRACE_PRINTK_SIZE 1024 > +#define BPF_TRACE_PRINTK_NEST 3 > + > +struct trace_printk_buf { > + char data[BPF_TRACE_PRINTK_NEST][BPF_TRACE_PRINTK_SIZE]; > + int nest; > +}; > +static DEFINE_PER_CPU(struct trace_printk_buf, printk_buf); > + > +static void put_printk_buf(struct trace_printk_buf __percpu *buf) > +{ > + this_cpu_dec(buf->nest); > + preempt_enable(); > +} > + > +static bool get_printk_buf(struct trace_printk_buf __percpu *buf, char **data) > +{ > + int nest; > + > + preempt_disable(); > + nest = this_cpu_inc_return(buf->nest); > + if (nest > BPF_TRACE_PRINTK_NEST) { > + put_printk_buf(buf); > + return false; > + } > + *data = (char *) this_cpu_ptr(&buf->data[nest - 1]); > + return true; > +} > > BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, > u64, arg2, u64, arg3) > { > u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; > u32 *bin_args; > - static char buf[BPF_TRACE_PRINTK_SIZE]; > - unsigned long flags; > + char *buf; > int ret; > > + if (!get_printk_buf(&printk_buf, &buf)) > + return -EBUSY; > + > ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, > MAX_TRACE_PRINTK_VARARGS); > if (ret < 0) > - return ret; > + goto out; > > - raw_spin_lock_irqsave(&trace_printk_lock, flags); > - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); > + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args); > > trace_bpf_trace_printk(buf); > - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); > > bpf_bprintf_cleanup(); > > +out: > + put_printk_buf(&printk_buf); > return ret; > } > > @@ -427,31 +454,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) > return &bpf_trace_printk_proto; > } > > +static DEFINE_PER_CPU(struct trace_printk_buf, vprintk_buf); > + > BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, > u32, data_len) > { > - static char buf[BPF_TRACE_PRINTK_SIZE]; > - unsigned long flags; > int ret, num_args; > u32 *bin_args; > + char *buf; > > if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || > (data_len && !data)) > return -EINVAL; > num_args = data_len / 8; > > + if (!get_printk_buf(&vprintk_buf, &buf)) > + return -EBUSY; > + > ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); > if (ret < 0) > - return ret; > + goto out; > > - raw_spin_lock_irqsave(&trace_printk_lock, flags); > - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); > + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args); > > trace_bpf_trace_printk(buf); > - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); > > bpf_bprintf_cleanup(); > > +out: > + put_printk_buf(&vprintk_buf); > return ret; > } Tested on a latest bpf-next build, I can confirm the patch also fixes this[1] issue. [1] https://lore.kernel.org/bpf/CACkBjsb3GRw5aiTT=RCUs3H5aum_QN+B0ZqZA=MvjspUP6NFMg@xxxxxxxxxxxxxx/T/#u