On Wed, Dec 14, 2022 at 11:48:06AM -0800, Andrii Nakryiko wrote: SNIP > > > > - raw_spin_lock_irqsave(&trace_printk_lock, flags); > > - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); > > + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args); > > > > trace_bpf_trace_printk(buf); > > - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); > > > > bpf_bprintf_cleanup(); > > I checked what this is doing. And it seems like we have a very similar > thing there already, with preempt_disable(), 3-level buffers, etc. > Would it make sense to roll this new change into bpf_bprintf_prepare() > and make it return the buffer for bpf_trace_printk(), even if it is > not used for bpf_snprintf() ? I thought adding another arg to bpf_bprintf_prepare would be too much, but it actually does not look that bad I'll do some more testing and send another version jirka --- diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3de24cfb7a3d..da5733f15994 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2795,9 +2795,10 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_PRINTF_BUF 1024 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); + u32 **bin_buf, char **buf, u32 num_args); void bpf_bprintf_cleanup(void); /* the implementation of the opaque uapi struct bpf_dynptr */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index af30c6cbd65d..e2c1e573401b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -756,19 +756,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary * arguments representation. */ -#define MAX_BPRINTF_BUF_LEN 512 +#define MAX_PRINTF_BIN_ARGS 512 /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { - char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; + char bin_args[MAX_PRINTF_BIN_ARGS]; + char buf[MAX_PRINTF_BUF]; }; -static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); + +static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); -static int try_get_fmt_tmp_buf(char **tmp_buf) +static int try_get_buffers(struct bpf_bprintf_buffers **bufs) { - struct bpf_bprintf_buffers *bufs; int nest_level; preempt_disable(); @@ -778,9 +779,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_bprintf_bufs); - *tmp_buf = bufs->tmp_bufs[nest_level - 1]; - + *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level]); return 0; } @@ -807,27 +806,33 @@ void bpf_bprintf_cleanup(void) * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_args, u32 num_args) + u32 **bin_args, char **buf, u32 num_args) { char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; + struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; + bool get_buffers = bin_args || buf; fmt_end = strnchr(fmt, fmt_size, 0); if (!fmt_end) return -EINVAL; fmt_size = fmt_end - fmt; - if (bin_args) { - if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) - return -EBUSY; + if (get_buffers && try_get_buffers(&buffers)) + return -EBUSY; - tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; + if (bin_args) { + tmp_buf = buffers->bin_args; + tmp_buf_end = tmp_buf + MAX_PRINTF_BIN_ARGS; *bin_args = (u32 *)tmp_buf; } + if (buf) + *buf = buffers->buf; + for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; @@ -1020,7 +1025,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, err = 0; out: - if (err) + if (err && get_buffers) bpf_bprintf_cleanup(); return err; } @@ -1039,7 +1044,7 @@ BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, NULL, num_args); if (err < 0) return err; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a5255a0dcbb6..798f65c532b1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7636,7 +7636,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args); if (err < 0) verbose(env, "Invalid format string\n"); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3bbd3f0c810c..7a6a07b2180e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -369,8 +369,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static DEFINE_RAW_SPINLOCK(trace_printk_lock); - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -379,20 +377,17 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; u32 *bin_args; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + char *buf; int ret; - ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, + ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, &buf, MAX_TRACE_PRINTK_VARARGS); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(buf, MAX_PRINTF_BUF, fmt, bin_args); trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); bpf_bprintf_cleanup(); @@ -430,25 +425,22 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, u32, data_len) { - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; int ret, num_args; u32 *bin_args; + char *buf; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || (data_len && !data)) return -EINVAL; num_args = data_len / 8; - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, &buf, num_args); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(buf, MAX_PRINTF_BUF, fmt, bin_args); trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); bpf_bprintf_cleanup(); @@ -482,7 +474,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, return -EINVAL; num_args = data_len / 8; - err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, NULL, num_args); if (err < 0) return err;