[RFC PATCH bpf-next 09/16] bpf: add bpf_seq_printf and bpf_seq_write helpers

Yonghong Song <yhs@xxxxxx> · Wed, 8 Apr 2020 16:25:31 -0700

Two helpers bpf_seq_printf and bpf_seq_write, are added for
writing data to the seq_file buffer.

bpf_seq_printf supports common format string flag/width/type
fields so at least I can get identical results for
netlink and ipv6_route targets.

For bpf_seq_printf, return value 1 specifically indicates
a write failure due to overflow in order to differentiate
the failure from format strings.

For seq_file show, since the same object may be called
twice, some bpf_prog might be sensitive to this. With return
value indicating overflow happens the bpf program can
react differently.

Signed-off-by: Yonghong Song <yhs@xxxxxx>
---
 include/uapi/linux/bpf.h       |  18 +++-
 kernel/trace/bpf_trace.c       | 172 +++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |   2 +
 tools/include/uapi/linux/bpf.h |  18 +++-
 4 files changed, 208 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b51d56fc77f9..a245f0df53c4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3030,6 +3030,20 @@ union bpf_attr {
  *		* **-EOPNOTSUPP**	Unsupported operation, for example a
  *					call from outside of TC ingress.
  *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, ...)
+ * 	Description
+ * 		seq_printf
+ * 	Return
+ * 		0 if successful, or
+ * 		1 if failure due to buffer overflow, or
+ * 		a negative value for format string related failures.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * 	Description
+ * 		seq_write
+ * 	Return
+ * 		0 if successful, non-zero otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3156,7 +3170,9 @@ union bpf_attr {
 	FN(xdp_output),			\
 	FN(get_netns_cookie),		\
 	FN(get_current_ancestor_cgroup_id),	\
-	FN(sk_assign),
+	FN(sk_assign),			\
+	FN(seq_printf),			\
+	FN(seq_write),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1796747a77..e7d6ba7c9c51 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -457,6 +457,174 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	return &bpf_trace_printk_proto;
 }
 
+BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, u64, arg1,
+	   u64, arg2)
+{
+	bool buf_used = false;
+	int i, copy_size;
+	int mod[2] = {};
+	int fmt_cnt = 0;
+	u64 unsafe_addr;
+	char buf[64];
+
+	/*
+	 * bpf_check()->check_func_arg()->check_stack_boundary()
+	 * guarantees that fmt points to bpf program stack,
+	 * fmt_size bytes of it were initialized and fmt_size > 0
+	 */
+	if (fmt[--fmt_size] != 0)
+		return -EINVAL;
+
+	/* check format string for allowed specifiers */
+	for (i = 0; i < fmt_size; i++) {
+		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
+			return -EINVAL;
+
+		if (fmt[i] != '%')
+			continue;
+
+		if (fmt_cnt >= 2)
+			return -EINVAL;
+
+		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
+		i++;
+
+		/* skip optional "[0+-][num]" width formating field */
+		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-')
+			i++;
+		if (fmt[i] >= '1' && fmt[i] <= '9') {
+			i++;
+			while (fmt[i] >= '0' && fmt[i] <= '9')
+				i++;
+		}
+
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		} else if (fmt[i] == 's') {
+			mod[fmt_cnt]++;
+			fmt_cnt++;
+			/* disallow any further format extensions */
+			if (fmt[i + 1] != 0 &&
+			    !isspace(fmt[i + 1]) &&
+			    !ispunct(fmt[i + 1]))
+				return -EINVAL;
+
+			if (buf_used)
+				/* allow only one '%s'/'%p' per fmt string */
+				return -EINVAL;
+			buf_used = true;
+
+			if (fmt_cnt == 1) {
+				unsafe_addr = arg1;
+				arg1 = (long) buf;
+			} else {
+				unsafe_addr = arg2;
+				arg2 = (long) buf;
+			}
+			buf[0] = 0;
+			strncpy_from_unsafe(buf,
+					    (void *) (long) unsafe_addr,
+					    sizeof(buf));
+			continue;
+		} else if (fmt[i] == 'p') {
+			mod[fmt_cnt]++;
+			fmt_cnt++;
+			if (fmt[i + 1] == 0 ||
+			    fmt[i + 1] == 'K' ||
+			    fmt[i + 1] == 'x') {
+				/* just kernel pointers */
+				continue;
+			}
+
+			if (buf_used)
+				return -EINVAL;
+			buf_used = true;
+
+			/* only support "%pI4", "%pi4", "%pI6" and "pi6". */
+			if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I')
+				return -EINVAL;
+			if (fmt[i + 2] != '4' && fmt[i + 2] != '6')
+				return -EINVAL;
+
+			copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+
+			if (fmt_cnt == 1) {
+				unsafe_addr = arg1;
+				arg1 = (long) buf;
+			} else {
+				unsafe_addr = arg2;
+				arg2 = (long) buf;
+			}
+			probe_kernel_read(buf, (void *) (long) unsafe_addr, copy_size);
+
+			i += 2;
+			continue;
+		}
+
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		}
+
+		if (fmt[i] != 'i' && fmt[i] != 'd' &&
+		    fmt[i] != 'u' && fmt[i] != 'x')
+			return -EINVAL;
+		fmt_cnt++;
+	}
+
+/* Horrid workaround for getting va_list handling working with different
+ * argument type combinations generically for 32 and 64 bit archs.
+ */
+#define __BPF_SP_EMIT()	__BPF_ARG2_SP()
+#define __BPF_SP(...)							\
+	seq_printf(m, fmt, ##__VA_ARGS__)
+
+#define __BPF_ARG1_SP(...)						\
+	((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))	\
+	  ? __BPF_SP(arg1, ##__VA_ARGS__)				\
+	  : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32))	\
+	      ? __BPF_SP((long)arg1, ##__VA_ARGS__)			\
+	      : __BPF_SP((u32)arg1, ##__VA_ARGS__)))
+
+#define __BPF_ARG2_SP(...)						\
+	((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64))	\
+	  ? __BPF_ARG1_SP(arg2, ##__VA_ARGS__)				\
+	  : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32))	\
+	      ? __BPF_ARG1_SP((long)arg2, ##__VA_ARGS__)		\
+	      : __BPF_ARG1_SP((u32)arg2, ##__VA_ARGS__)))
+
+	__BPF_SP_EMIT();
+	return seq_has_overflowed(m);
+}
+
+static int bpf_seq_printf_btf_ids[5];
+static const struct bpf_func_proto bpf_seq_printf_proto = {
+	.func		= bpf_seq_printf,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.btf_id		= bpf_seq_printf_btf_ids,
+};
+
+BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const char *, data, u32, len)
+{
+	return seq_write(m, data, len);
+}
+
+static int bpf_seq_write_btf_ids[5];
+static const struct bpf_func_proto bpf_seq_write_proto = {
+	.func		= bpf_seq_write,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.btf_id		= bpf_seq_write_btf_ids,
+};
+
 static __always_inline int
 get_map_perf_counter(struct bpf_map *map, u64 flags,
 		     u64 *value, u64 *enabled, u64 *running)
@@ -1224,6 +1392,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_xdp_output:
 		return &bpf_xdp_output_proto;
 #endif
+	case BPF_FUNC_seq_printf:
+		return &bpf_seq_printf_proto;
+	case BPF_FUNC_seq_write:
+		return &bpf_seq_write_proto;
 	default:
 		return raw_tp_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index f43d193aff3a..ded304c96a05 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -414,6 +414,7 @@ class PrinterHelpers(Printer):
             'struct sk_reuseport_md',
             'struct sockaddr',
             'struct tcphdr',
+            'struct seq_file',
 
             'struct __sk_buff',
             'struct sk_msg_md',
@@ -450,6 +451,7 @@ class PrinterHelpers(Printer):
             'struct sk_reuseport_md',
             'struct sockaddr',
             'struct tcphdr',
+            'struct seq_file',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b51d56fc77f9..a245f0df53c4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3030,6 +3030,20 @@ union bpf_attr {
  *		* **-EOPNOTSUPP**	Unsupported operation, for example a
  *					call from outside of TC ingress.
  *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, ...)
+ * 	Description
+ * 		seq_printf
+ * 	Return
+ * 		0 if successful, or
+ * 		1 if failure due to buffer overflow, or
+ * 		a negative value for format string related failures.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * 	Description
+ * 		seq_write
+ * 	Return
+ * 		0 if successful, non-zero otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3156,7 +3170,9 @@ union bpf_attr {
 	FN(xdp_output),			\
 	FN(get_netns_cookie),		\
 	FN(get_current_ancestor_cgroup_id),	\
-	FN(sk_assign),
+	FN(sk_assign),			\
+	FN(seq_printf),			\
+	FN(seq_write),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
2.24.1