On 4/22/21 5:26 PM, Alexei Starovoitov wrote:
From: Alexei Starovoitov <ast@xxxxxxxxxx>
Add placeholders for bpf_sys_bpf() helper and new program type.
v1->v2:
- check that expected_attach_type is zero
- allow more helper functions to be used in this program type, since they will
only execute from user context via bpf_prog_test_run.
Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
---
include/linux/bpf.h | 10 +++++++
include/linux/bpf_types.h | 2 ++
include/uapi/linux/bpf.h | 8 +++++
kernel/bpf/syscall.c | 54 ++++++++++++++++++++++++++++++++++
net/bpf/test_run.c | 43 +++++++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 8 +++++
6 files changed, 125 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f8a45f109e96..aed30bbffb54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1824,6 +1824,9 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
@@ -1849,6 +1852,13 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
+
+static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index f883f01a5061..a9db1eae6796 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -77,6 +77,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
void *, void *)
#endif /* CONFIG_BPF_LSM */
#endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall,
+ void *, void *)
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ec6d85a81744..c92648f38144 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -937,6 +937,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
};
enum bpf_attach_type {
@@ -4735,6 +4736,12 @@ union bpf_attr {
* be zero-terminated except when **str_size** is 0.
*
* Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * Description
+ * Execute bpf syscall with given arguments.
+ * Return
+ * A syscall result.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4903,6 +4910,7 @@ union bpf_attr {
FN(check_mtu), \
FN(for_each_map_elem), \
FN(snprintf), \
+ FN(sys_bpf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd495190115e..8636876f3e6b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2014,6 +2014,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
if (expected_attach_type == BPF_SK_LOOKUP)
return 0;
return -EINVAL;
+ case BPF_PROG_TYPE_SYSCALL:
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
return -EINVAL;
@@ -4497,3 +4498,56 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
return err;
}
+
+static bool syscall_prog_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= U16_MAX)
+ return false;
Is this enough? If I understand correctly, the new program type
allows any arbitrary context data from user as long as its size
meets the following constraints:
if (ctx_size_in < prog->aux->max_ctx_offset ||
ctx_size_in > U16_MAX)
return -EINVAL;
So if user provides a ctx with size say 40 and inside the program looks
it is still able to read/write to say offset 400.
Should we be a little more restrictive on this?
+ if (off % size != 0)
+ return false;
+ return true;
+}
+
+BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
+{
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_sys_bpf_proto = {
+ .func = bpf_sys_bpf,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+const struct bpf_func_proto * __weak
+tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+
+ return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_func_proto *
+syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_sys_bpf:
+ return &bpf_sys_bpf_proto;
+ default:
+ return tracing_prog_func_proto(func_id, prog);
+ }
+}
+
+const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
+ .get_func_proto = syscall_prog_func_proto,
+ .is_valid_access = syscall_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops bpf_syscall_prog_ops = {
+ .test_run = bpf_prog_test_run_syscall,
+};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a5d72c48fb66..1783ea77b95c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -918,3 +918,46 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
kfree(user_ctx);
return ret;
}
+
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+ __u32 ctx_size_in = kattr->test.ctx_size_in;
+ void *ctx = NULL;
+ u32 retval;
+ int err = 0;
+
+ /* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
+ if (kattr->test.data_in || kattr->test.data_out ||
+ kattr->test.ctx_out || kattr->test.duration ||
+ kattr->test.repeat || kattr->test.flags)
+ return -EINVAL;
+
+ if (ctx_size_in < prog->aux->max_ctx_offset ||
+ ctx_size_in > U16_MAX)
+ return -EINVAL;
+
+ if (ctx_size_in) {
+ ctx = kzalloc(ctx_size_in, GFP_USER);
+ if (!ctx)
+ return -ENOMEM;
+ if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
+ err = -EFAULT;
+ goto out;
+ }
+ }
+ retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+
+ if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32)))
+ err = -EFAULT;
+ if (ctx_size_in)
+ if (copy_to_user(ctx_in, ctx, ctx_size_in)) {
+ err = -EFAULT;
+ goto out;
+ }
+out:
+ kfree(ctx);
+ return err;
+}
[...]