Re: [PATCH v2 bpf-next 01/16] bpf: Introduce bpf_sys_bpf() helper and program type.

Yonghong Song <yhs@xxxxxx> · Fri, 23 Apr 2021 11:15:48 -0700

On 4/22/21 5:26 PM, Alexei Starovoitov wrote:
From: Alexei Starovoitov <ast@xxxxxxxxxx>

Add placeholders for bpf_sys_bpf() helper and new program type.

v1->v2:
- check that expected_attach_type is zero
- allow more helper functions to be used in this program type, since they will
   only execute from user context via bpf_prog_test_run.

Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
---
  include/linux/bpf.h            | 10 +++++++
  include/linux/bpf_types.h      |  2 ++
  include/uapi/linux/bpf.h       |  8 +++++
  kernel/bpf/syscall.c           | 54 ++++++++++++++++++++++++++++++++++
  net/bpf/test_run.c             | 43 +++++++++++++++++++++++++++
  tools/include/uapi/linux/bpf.h |  8 +++++
  6 files changed, 125 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f8a45f109e96..aed30bbffb54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1824,6 +1824,9 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
  
  struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
  void bpf_map_offload_map_free(struct bpf_map *map);
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+			      const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr);
  #else
  static inline int bpf_prog_offload_init(struct bpf_prog *prog,
  					union bpf_attr *attr)
@@ -1849,6 +1852,13 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
  static inline void bpf_map_offload_map_free(struct bpf_map *map)
  {
  }
+
+static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+					    const union bpf_attr *kattr,
+					    union bpf_attr __user *uattr)
+{
+	return -ENOTSUPP;
+}
  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
  
  #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index f883f01a5061..a9db1eae6796 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -77,6 +77,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
  	       void *, void *)
  #endif /* CONFIG_BPF_LSM */
  #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall,
+	      void *, void *)
  
  BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
  BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ec6d85a81744..c92648f38144 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -937,6 +937,7 @@ enum bpf_prog_type {
  	BPF_PROG_TYPE_EXT,
  	BPF_PROG_TYPE_LSM,
  	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
  };
  
  enum bpf_attach_type {
@@ -4735,6 +4736,12 @@ union bpf_attr {
   *		be zero-terminated except when **str_size** is 0.
   *
   *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * 	Description
+ * 		Execute bpf syscall with given arguments.
+ * 	Return
+ * 		A syscall result.
   */
  #define __BPF_FUNC_MAPPER(FN)		\
  	FN(unspec),			\
@@ -4903,6 +4910,7 @@ union bpf_attr {
  	FN(check_mtu),			\
  	FN(for_each_map_elem),		\
  	FN(snprintf),			\
+	FN(sys_bpf),			\
  	/* */
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd495190115e..8636876f3e6b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2014,6 +2014,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
  		if (expected_attach_type == BPF_SK_LOOKUP)
  			return 0;
  		return -EINVAL;
+	case BPF_PROG_TYPE_SYSCALL:
  	case BPF_PROG_TYPE_EXT:
  		if (expected_attach_type)
  			return -EINVAL;
@@ -4497,3 +4498,56 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
  
  	return err;
  }
+
+static bool syscall_prog_is_valid_access(int off, int size,
+					 enum bpf_access_type type,
+					 const struct bpf_prog *prog,
+					 struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= U16_MAX)
+		return false;

Is this enough? If I understand correctly, the new program type
allows any arbitrary context data from user as long as its size
meets the following constraints:
   if (ctx_size_in < prog->aux->max_ctx_offset ||
 	    ctx_size_in > U16_MAX)
		return -EINVAL;

So if user provides a ctx with size say 40 and inside the program looks
it is still able to read/write to say offset 400.
Should we be a little more restrictive on this?

+	if (off % size != 0)
+		return false;
+	return true;
+}
+
+BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
+{
+	return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_sys_bpf_proto = {
+	.func		= bpf_sys_bpf,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+
+const struct bpf_func_proto * __weak
+tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+
+	return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_func_proto *
+syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_sys_bpf:
+		return &bpf_sys_bpf_proto;
+	default:
+		return tracing_prog_func_proto(func_id, prog);
+	}
+}
+
+const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
+	.get_func_proto  = syscall_prog_func_proto,
+	.is_valid_access = syscall_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops bpf_syscall_prog_ops = {
+	.test_run = bpf_prog_test_run_syscall,
+};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a5d72c48fb66..1783ea77b95c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -918,3 +918,46 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
  	kfree(user_ctx);
  	return ret;
  }
+
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+			      const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr)
+{
+	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+	__u32 ctx_size_in = kattr->test.ctx_size_in;
+	void *ctx = NULL;
+	u32 retval;
+	int err = 0;
+
+	/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
+	if (kattr->test.data_in || kattr->test.data_out ||
+	    kattr->test.ctx_out || kattr->test.duration ||
+	    kattr->test.repeat || kattr->test.flags)
+		return -EINVAL;
+
+	if (ctx_size_in < prog->aux->max_ctx_offset ||
+	    ctx_size_in > U16_MAX)
+		return -EINVAL;
+
+	if (ctx_size_in) {
+		ctx = kzalloc(ctx_size_in, GFP_USER);
+		if (!ctx)
+			return -ENOMEM;
+		if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
+			err = -EFAULT;
+			goto out;
+		}
+	}
+	retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+
+	if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32)))
+		err = -EFAULT;
+	if (ctx_size_in)
+		if (copy_to_user(ctx_in, ctx, ctx_size_in)) {
+			err = -EFAULT;
+			goto out;
+		}
+out:
+	kfree(ctx);
+	return err;
+}
[...]