Let the user to register a BPF_PROG_TYPE_IOURING BPF program to a ring. The progrma will be run in the waiting loop every time something happens, i.e. the task was woken up by a task_work / signal / etc. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- include/linux/io_uring_types.h | 4 +++ include/uapi/linux/io_uring.h | 9 +++++ io_uring/bpf.c | 63 ++++++++++++++++++++++++++++++++++ io_uring/bpf.h | 41 ++++++++++++++++++++++ io_uring/io_uring.c | 15 ++++++++ io_uring/register.c | 7 ++++ 6 files changed, 139 insertions(+) create mode 100644 io_uring/bpf.h diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index ad5001102c86..50cee0d3622e 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -8,6 +8,8 @@ #include <linux/llist.h> #include <uapi/linux/io_uring.h> +struct io_bpf_ctx; + enum { /* * A hint to not wake right away but delay until there are enough of @@ -246,6 +248,8 @@ struct io_ring_ctx { enum task_work_notify_mode notify_method; unsigned sq_thread_idle; + + struct io_bpf_ctx *bpf_ctx; } ____cacheline_aligned_in_smp; /* submission data */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ba373deb8406..f2c2fefc8514 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -634,6 +634,8 @@ enum io_uring_register_op { /* register fixed io_uring_reg_wait arguments */ IORING_REGISTER_CQWAIT_REG = 34, + IORING_REGISTER_BPF = 35, + /* this goes last */ IORING_REGISTER_LAST, @@ -905,6 +907,13 @@ enum io_uring_socket_op { SOCKET_URING_OP_SETSOCKOPT, }; +struct io_uring_bpf_reg { + __u64 prog_fd; + __u32 flags; + __u32 resv1; + __u64 resv2[2]; +}; + #ifdef __cplusplus } #endif diff --git a/io_uring/bpf.c b/io_uring/bpf.c index 6eb0c47b4aa9..8b7c74761c63 100644 --- a/io_uring/bpf.c +++ b/io_uring/bpf.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> +#include <linux/filter.h> + +#include "bpf.h" static const struct bpf_func_proto * io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -22,3 +25,63 @@ const struct bpf_verifier_ops bpf_io_uring_verifier_ops = { .get_func_proto = io_bpf_func_proto, .is_valid_access = io_bpf_is_valid_access, }; + +int io_run_bpf(struct io_ring_ctx *ctx) +{ + struct io_bpf_ctx *bc = ctx->bpf_ctx; + int ret; + + mutex_lock(&ctx->uring_lock); + ret = bpf_prog_run_pin_on_cpu(bc->prog, bc); + mutex_unlock(&ctx->uring_lock); + return ret; +} + +int io_unregister_bpf(struct io_ring_ctx *ctx) +{ + struct io_bpf_ctx *bc = ctx->bpf_ctx; + + if (!bc) + return -ENXIO; + bpf_prog_put(bc->prog); + kfree(bc); + ctx->bpf_ctx = NULL; + return 0; +} + +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg, + unsigned int nr_args) +{ + struct __user io_uring_bpf_reg *bpf_reg_usr = arg; + struct io_uring_bpf_reg bpf_reg; + struct io_bpf_ctx *bc; + struct bpf_prog *prog; + + if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) + return -EOPNOTSUPP; + + if (nr_args != 1) + return -EINVAL; + if (copy_from_user(&bpf_reg, bpf_reg_usr, sizeof(bpf_reg))) + return -EFAULT; + if (bpf_reg.flags || bpf_reg.resv1 || + bpf_reg.resv2[0] || bpf_reg.resv2[1]) + return -EINVAL; + + if (ctx->bpf_ctx) + return -ENXIO; + + bc = kzalloc(sizeof(*bc), GFP_KERNEL); + if (!bc) + return -ENOMEM; + + prog = bpf_prog_get_type(bpf_reg.prog_fd, BPF_PROG_TYPE_IOURING); + if (IS_ERR(prog)) { + kfree(bc); + return PTR_ERR(prog); + } + + bc->prog = prog; + ctx->bpf_ctx = bc; + return 0; +} diff --git a/io_uring/bpf.h b/io_uring/bpf.h new file mode 100644 index 000000000000..2b4e555ff07a --- /dev/null +++ b/io_uring/bpf.h @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef IOU_BPF_H +#define IOU_BPF_H + +#include <linux/io_uring/bpf.h> +#include <linux/io_uring_types.h> + +struct bpf_prog; + +struct io_bpf_ctx { + struct io_bpf_ctx_kern kern; + struct bpf_prog *prog; +}; + +static inline bool io_bpf_enabled(struct io_ring_ctx *ctx) +{ + return IS_ENABLED(CONFIG_BPF) && ctx->bpf_ctx != NULL; +} + +#ifdef CONFIG_BPF +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg, + unsigned int nr_args); +int io_unregister_bpf(struct io_ring_ctx *ctx); +int io_run_bpf(struct io_ring_ctx *ctx); + +#else +static inline int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg, + unsigned int nr_args) +{ + return -EOPNOTSUPP; +} +static inline int io_unregister_bpf(struct io_ring_ctx *ctx) +{ + return -EOPNOTSUPP; +} +static inline int io_run_bpf(struct io_ring_ctx *ctx) +{ +} +#endif + +#endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f34fa1ead2cf..82599e2a888a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -104,6 +104,7 @@ #include "rw.h" #include "alloc_cache.h" #include "eventfd.h" +#include "bpf.h" #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \ IOSQE_IO_HARDLINK | IOSQE_ASYNC) @@ -2834,6 +2835,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, io_napi_busy_loop(ctx, &iowq); + if (io_bpf_enabled(ctx)) { + ret = io_run_bpf(ctx); + if (ret == IOU_BPF_RET_STOP) + return 0; + } + trace_io_uring_cqring_wait(ctx, min_events); do { unsigned long check_cq; @@ -2879,6 +2886,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (ret < 0) break; + if (io_bpf_enabled(ctx)) { + ret = io_run_bpf(ctx); + if (ret == IOU_BPF_RET_STOP) + break; + continue; + } + check_cq = READ_ONCE(ctx->check_cq); if (unlikely(check_cq)) { /* let the caller flush overflows, retry */ @@ -3009,6 +3023,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_futex_cache_free(ctx); io_destroy_buffers(ctx); io_unregister_cqwait_reg(ctx); + io_unregister_bpf(ctx); mutex_unlock(&ctx->uring_lock); if (ctx->sq_creds) put_cred(ctx->sq_creds); diff --git a/io_uring/register.c b/io_uring/register.c index 45edfc57963a..2a8efeacf2db 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -30,6 +30,7 @@ #include "eventfd.h" #include "msg_ring.h" #include "memmap.h" +#include "bpf.h" #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ IORING_REGISTER_LAST + IORING_OP_LAST) @@ -846,6 +847,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_register_cqwait_reg(ctx, arg); break; + case IORING_REGISTER_BPF: + ret = -EINVAL; + if (!arg) + break; + ret = io_register_bpf(ctx, arg, nr_args); + break; default: ret = -EINVAL; break; -- 2.46.0