On Wed, Feb 22, 2023 at 5:29 AM Xiaoguang Wang <xiaoguang.wang@xxxxxxxxxxxxxxxxx> wrote: > > Currenly only one bpf_ublk_queue_sqe() ebpf is added, ublksrv target > can use this helper to write ebpf prog to support ublk kernel & usersapce > zero copy, please see ublksrv test codes for more info. > > Signed-off-by: Xiaoguang Wang <xiaoguang.wang@xxxxxxxxxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 263 +++++++++++++++++++++++++++++++-- > include/uapi/linux/bpf.h | 1 + > include/uapi/linux/ublk_cmd.h | 18 +++ > kernel/bpf/verifier.c | 3 +- > scripts/bpf_doc.py | 4 + > tools/include/uapi/linux/bpf.h | 9 ++ > 6 files changed, 286 insertions(+), 12 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index b628e9eaefa6..d17ddb6fc27f 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -105,6 +105,12 @@ struct ublk_uring_cmd_pdu { > */ > #define UBLK_IO_FLAG_NEED_GET_DATA 0x08 > > +/* > + * UBLK_IO_FLAG_BPF is set if IO command has be handled by ebpf prog instead > + * of user space daemon. > + */ > +#define UBLK_IO_FLAG_BPF 0x10 > + > struct ublk_io { > /* userspace buffer address from io cmd */ > __u64 addr; > @@ -114,6 +120,11 @@ struct ublk_io { > struct io_uring_cmd *cmd; > }; > > +struct ublk_req_iter { > + struct io_fixed_iter fixed_iter; > + struct bio_vec *bvec; > +}; > + > struct ublk_queue { > int q_id; > int q_depth; > @@ -163,6 +174,9 @@ struct ublk_device { > unsigned int nr_queues_ready; > atomic_t nr_aborted_queues; > > + struct bpf_prog *io_bpf_prog; > + struct ublk_req_iter *iter_table; > + > /* > * Our ubq->daemon may be killed without any notification, so > * monitor each queue's daemon periodically > @@ -189,10 +203,48 @@ static DEFINE_MUTEX(ublk_ctl_mutex); > > static struct miscdevice ublk_misc; > > +struct ublk_io_bpf_ctx { > + struct ublk_bpf_ctx ctx; > + struct ublk_device *ub; > +}; > + > +static inline struct ublk_req_iter *ublk_get_req_iter(struct ublk_device *ub, > + int qid, int tag) > +{ > + return &(ub->iter_table[qid * ub->dev_info.queue_depth + tag]); > +} > + > +BPF_CALL_4(bpf_ublk_queue_sqe, struct ublk_io_bpf_ctx *, bpf_ctx, > + struct io_uring_sqe *, sqe, u32, sqe_len, u32, fd) > +{ > + struct ublk_req_iter *req_iter; > + u16 q_id = bpf_ctx->ctx.q_id; > + u16 tag = bpf_ctx->ctx.tag; > + > + req_iter = ublk_get_req_iter(bpf_ctx->ub, q_id, tag); > + io_uring_submit_sqe(fd, sqe, sqe_len, &(req_iter->fixed_iter)); > + return 0; > +} > + > +const struct bpf_func_proto ublk_bpf_queue_sqe_proto = { > + .func = bpf_ublk_queue_sqe, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_ANYTHING, > + .arg2_type = ARG_ANYTHING, > + .arg3_type = ARG_ANYTHING, > + .arg4_type = ARG_ANYTHING, > +}; You know that the above is unsafe, right? > + > static const struct bpf_func_proto * > ublk_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > { > - return bpf_base_func_proto(func_id); > + switch (func_id) { > + case BPF_FUNC_ublk_queue_sqe: > + return &ublk_bpf_queue_sqe_proto; > + default: > + return bpf_base_func_proto(func_id); > + } > } > > static bool ublk_bpf_is_valid_access(int off, int size, > @@ -200,6 +252,23 @@ static bool ublk_bpf_is_valid_access(int off, int size, > const struct bpf_prog *prog, > struct bpf_insn_access_aux *info) > { > + if (off < 0 || off >= sizeof(struct ublk_bpf_ctx)) > + return false; > + if (off % size != 0) > + return false; > + > + switch (off) { > + case offsetof(struct ublk_bpf_ctx, q_id): > + return size == sizeof_field(struct ublk_bpf_ctx, q_id); > + case offsetof(struct ublk_bpf_ctx, tag): > + return size == sizeof_field(struct ublk_bpf_ctx, tag); > + case offsetof(struct ublk_bpf_ctx, op): > + return size == sizeof_field(struct ublk_bpf_ctx, op); > + case offsetof(struct ublk_bpf_ctx, nr_sectors): > + return size == sizeof_field(struct ublk_bpf_ctx, nr_sectors); > + case offsetof(struct ublk_bpf_ctx, start_sector): > + return size == sizeof_field(struct ublk_bpf_ctx, start_sector); > + } > return false; We don't introduce stable 'ctx' anymore. Please see how hid-bpf is doing things.