Two helpers bpf_seq_printf and bpf_seq_write, are added for writing data to the seq_file buffer. bpf_seq_printf supports common format string flag/width/type fields so at least I can get identical results for netlink and ipv6_route targets. For bpf_seq_printf, return value 1 specifically indicates a write failure due to overflow in order to differentiate the failure from format strings. For seq_file show, since the same object may be called twice, some bpf_prog might be sensitive to this. With return value indicating overflow happens the bpf program can react differently. Signed-off-by: Yonghong Song <yhs@xxxxxx> --- include/uapi/linux/bpf.h | 18 +++- kernel/trace/bpf_trace.c | 172 +++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 + tools/include/uapi/linux/bpf.h | 18 +++- 4 files changed, 208 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b51d56fc77f9..a245f0df53c4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3030,6 +3030,20 @@ union bpf_attr { * * **-EOPNOTSUPP** Unsupported operation, for example a * call from outside of TC ingress. * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). + * + * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, ...) + * Description + * seq_printf + * Return + * 0 if successful, or + * 1 if failure due to buffer overflow, or + * a negative value for format string related failures. + * + * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * Description + * seq_write + * Return + * 0 if successful, non-zero otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3156,7 +3170,9 @@ union bpf_attr { FN(xdp_output), \ FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), + FN(sk_assign), \ + FN(seq_printf), \ + FN(seq_write), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ca1796747a77..e7d6ba7c9c51 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -457,6 +457,174 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } +BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, u64, arg1, + u64, arg2) +{ + bool buf_used = false; + int i, copy_size; + int mod[2] = {}; + int fmt_cnt = 0; + u64 unsafe_addr; + char buf[64]; + + /* + * bpf_check()->check_func_arg()->check_stack_boundary() + * guarantees that fmt points to bpf program stack, + * fmt_size bytes of it were initialized and fmt_size > 0 + */ + if (fmt[--fmt_size] != 0) + return -EINVAL; + + /* check format string for allowed specifiers */ + for (i = 0; i < fmt_size; i++) { + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) + return -EINVAL; + + if (fmt[i] != '%') + continue; + + if (fmt_cnt >= 2) + return -EINVAL; + + /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ + i++; + + /* skip optional "[0+-][num]" width formating field */ + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-') + i++; + if (fmt[i] >= '1' && fmt[i] <= '9') { + i++; + while (fmt[i] >= '0' && fmt[i] <= '9') + i++; + } + + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } else if (fmt[i] == 's') { + mod[fmt_cnt]++; + fmt_cnt++; + /* disallow any further format extensions */ + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) + return -EINVAL; + + if (buf_used) + /* allow only one '%s'/'%p' per fmt string */ + return -EINVAL; + buf_used = true; + + if (fmt_cnt == 1) { + unsafe_addr = arg1; + arg1 = (long) buf; + } else { + unsafe_addr = arg2; + arg2 = (long) buf; + } + buf[0] = 0; + strncpy_from_unsafe(buf, + (void *) (long) unsafe_addr, + sizeof(buf)); + continue; + } else if (fmt[i] == 'p') { + mod[fmt_cnt]++; + fmt_cnt++; + if (fmt[i + 1] == 0 || + fmt[i + 1] == 'K' || + fmt[i + 1] == 'x') { + /* just kernel pointers */ + continue; + } + + if (buf_used) + return -EINVAL; + buf_used = true; + + /* only support "%pI4", "%pi4", "%pI6" and "pi6". */ + if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') + return -EINVAL; + if (fmt[i + 2] != '4' && fmt[i + 2] != '6') + return -EINVAL; + + copy_size = (fmt[i + 2] == '4') ? 4 : 16; + + if (fmt_cnt == 1) { + unsafe_addr = arg1; + arg1 = (long) buf; + } else { + unsafe_addr = arg2; + arg2 = (long) buf; + } + probe_kernel_read(buf, (void *) (long) unsafe_addr, copy_size); + + i += 2; + continue; + } + + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } + + if (fmt[i] != 'i' && fmt[i] != 'd' && + fmt[i] != 'u' && fmt[i] != 'x') + return -EINVAL; + fmt_cnt++; + } + +/* Horrid workaround for getting va_list handling working with different + * argument type combinations generically for 32 and 64 bit archs. + */ +#define __BPF_SP_EMIT() __BPF_ARG2_SP() +#define __BPF_SP(...) \ + seq_printf(m, fmt, ##__VA_ARGS__) + +#define __BPF_ARG1_SP(...) \ + ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_SP(arg1, ##__VA_ARGS__) \ + : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_SP((long)arg1, ##__VA_ARGS__) \ + : __BPF_SP((u32)arg1, ##__VA_ARGS__))) + +#define __BPF_ARG2_SP(...) \ + ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_ARG1_SP(arg2, ##__VA_ARGS__) \ + : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_ARG1_SP((long)arg2, ##__VA_ARGS__) \ + : __BPF_ARG1_SP((u32)arg2, ##__VA_ARGS__))) + + __BPF_SP_EMIT(); + return seq_has_overflowed(m); +} + +static int bpf_seq_printf_btf_ids[5]; +static const struct bpf_func_proto bpf_seq_printf_proto = { + .func = bpf_seq_printf, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .btf_id = bpf_seq_printf_btf_ids, +}; + +BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const char *, data, u32, len) +{ + return seq_write(m, data, len); +} + +static int bpf_seq_write_btf_ids[5]; +static const struct bpf_func_proto bpf_seq_write_proto = { + .func = bpf_seq_write, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .btf_id = bpf_seq_write_btf_ids, +}; + static __always_inline int get_map_perf_counter(struct bpf_map *map, u64 flags, u64 *value, u64 *enabled, u64 *running) @@ -1224,6 +1392,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_xdp_output: return &bpf_xdp_output_proto; #endif + case BPF_FUNC_seq_printf: + return &bpf_seq_printf_proto; + case BPF_FUNC_seq_write: + return &bpf_seq_write_proto; default: return raw_tp_prog_func_proto(func_id, prog); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index f43d193aff3a..ded304c96a05 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -414,6 +414,7 @@ class PrinterHelpers(Printer): 'struct sk_reuseport_md', 'struct sockaddr', 'struct tcphdr', + 'struct seq_file', 'struct __sk_buff', 'struct sk_msg_md', @@ -450,6 +451,7 @@ class PrinterHelpers(Printer): 'struct sk_reuseport_md', 'struct sockaddr', 'struct tcphdr', + 'struct seq_file', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b51d56fc77f9..a245f0df53c4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3030,6 +3030,20 @@ union bpf_attr { * * **-EOPNOTSUPP** Unsupported operation, for example a * call from outside of TC ingress. * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). + * + * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, ...) + * Description + * seq_printf + * Return + * 0 if successful, or + * 1 if failure due to buffer overflow, or + * a negative value for format string related failures. + * + * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * Description + * seq_write + * Return + * 0 if successful, non-zero otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3156,7 +3170,9 @@ union bpf_attr { FN(xdp_output), \ FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), + FN(sk_assign), \ + FN(seq_printf), \ + FN(seq_write), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- 2.24.1