Add ublk bpf aio kfuncs for bpf prog to do: - prepare buffer - assign bpf aio struct_ops - submit bpf aios for handle ublk io command - deal with ublk io and bpf aio lifetime, and make sure that ublk io won't be completed until all bpf aios are completed Signed-off-by: Ming Lei <tom.leiming@xxxxxxxxx> --- drivers/block/ublk/bpf.c | 77 ++++++++++++++++++++++++++++++++++++ drivers/block/ublk/bpf_aio.c | 6 ++- drivers/block/ublk/bpf_aio.h | 38 +++++++++++++++++- drivers/block/ublk/ublk.h | 2 + 4 files changed, 121 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk/bpf.c b/drivers/block/ublk/bpf.c index 921bbbcf4d9e..c0babf6d5868 100644 --- a/drivers/block/ublk/bpf.c +++ b/drivers/block/ublk/bpf.c @@ -228,6 +228,77 @@ ublk_bpf_complete_io(struct ublk_bpf_io *io, int res) ublk_bpf_complete_io_cmd(io, res); } +/* + * Called before submitting one bpf aio in prog, and this ublk IO's + * reference is increased. + * + * Grab reference of `io` for this `aio`, and the reference will be dropped + * by ublk_bpf_dettach_and_complete_aio() + */ +__bpf_kfunc int +ublk_bpf_attach_and_prep_aio(const struct ublk_bpf_io *_io, unsigned off, + unsigned bytes, struct bpf_aio *aio) +{ + struct ublk_bpf_io *io = (struct ublk_bpf_io *)_io; + const struct request *req; + const struct ublk_rq_data *data; + const struct ublk_bpf_io *bpf_io; + + if (!io || !aio) + return -EINVAL; + + req = ublk_bpf_get_req(io); + if (!req) + return -EINVAL; + + if (off + bytes > blk_rq_bytes(req)) + return -EINVAL; + + if (req->mq_hctx) { + const struct ublk_queue *ubq = req->mq_hctx->driver_data; + + bpf_aio_assign_cb(aio, ubq->bpf_aio_ops); + } + + data = blk_mq_rq_to_pdu((struct request *)req); + bpf_io = &data->bpf_data; + bpf_aio_assign_buf(aio, &bpf_io->buf, off, bytes); + + refcount_inc(&io->ref); + aio->private_data = (void *)io; + + return 0; +} + +/* + * Called after this attached aio is completed, and the associated ublk IO's + * reference is decreased, and if the reference is dropped to zero, complete + * this ublk IO. + * + * Return -EIOCBQUEUED if this `io` is being handled, and 0 is returned + * if it can be completed now. + */ +__bpf_kfunc void +ublk_bpf_dettach_and_complete_aio(struct bpf_aio *aio) +{ + struct ublk_bpf_io *io = aio->private_data; + + if (io) { + ublk_bpf_io_dec_ref(io); + aio->private_data = NULL; + } +} + +__bpf_kfunc struct ublk_bpf_io *ublk_bpf_acquire_io_from_aio(struct bpf_aio *aio) +{ + return aio->private_data; +} + +__bpf_kfunc void ublk_bpf_release_io_from_aio(struct ublk_bpf_io *io) +{ +} + + BTF_KFUNCS_START(ublk_bpf_kfunc_ids) BTF_ID_FLAGS(func, ublk_bpf_complete_io, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, ublk_bpf_get_iod, KF_TRUSTED_ARGS | KF_RET_NULL) @@ -240,6 +311,12 @@ BTF_ID_FLAGS(func, bpf_aio_alloc, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_aio_alloc_sleepable, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_aio_release) BTF_ID_FLAGS(func, bpf_aio_submit) + +/* ublk bpf aio kfuncs */ +BTF_ID_FLAGS(func, ublk_bpf_attach_and_prep_aio) +BTF_ID_FLAGS(func, ublk_bpf_dettach_and_complete_aio) +BTF_ID_FLAGS(func, ublk_bpf_acquire_io_from_aio, KF_ACQUIRE) +BTF_ID_FLAGS(func, ublk_bpf_release_io_from_aio, KF_RELEASE) BTF_KFUNCS_END(ublk_bpf_kfunc_ids) __bpf_kfunc void bpf_aio_release_dtor(void *aio) diff --git a/drivers/block/ublk/bpf_aio.c b/drivers/block/ublk/bpf_aio.c index da050be4b710..06a6cc8f38b1 100644 --- a/drivers/block/ublk/bpf_aio.c +++ b/drivers/block/ublk/bpf_aio.c @@ -211,6 +211,7 @@ __bpf_kfunc void bpf_aio_release(struct bpf_aio *aio) __bpf_kfunc int bpf_aio_submit(struct bpf_aio *aio, int fd, loff_t pos, unsigned bytes, unsigned io_flags) { + unsigned op = bpf_aio_get_op(aio); struct file *file; /* @@ -220,6 +221,9 @@ __bpf_kfunc int bpf_aio_submit(struct bpf_aio *aio, int fd, loff_t pos, if (!aio->ops) return -EINVAL; + if (unlikely((bytes > aio->buf_size) && bpf_aio_is_rw(op))) + return -EINVAL; + file = fget(fd); if (!file) return -EINVAL; @@ -232,7 +236,7 @@ __bpf_kfunc int bpf_aio_submit(struct bpf_aio *aio, int fd, loff_t pos, aio->iocb.ki_filp = file; aio->iocb.ki_flags = io_flags; aio->bytes = bytes; - if (bpf_aio_is_rw(bpf_aio_get_op(aio))) { + if (bpf_aio_is_rw(op)) { if (file->f_flags & O_DIRECT) aio->iocb.ki_flags |= IOCB_DIRECT; else diff --git a/drivers/block/ublk/bpf_aio.h b/drivers/block/ublk/bpf_aio.h index d144c5e20dcb..0683139f5354 100644 --- a/drivers/block/ublk/bpf_aio.h +++ b/drivers/block/ublk/bpf_aio.h @@ -40,11 +40,15 @@ struct bpf_aio_buf { struct bpf_aio { unsigned int opf; - unsigned int bytes; + union { + unsigned int bytes; + unsigned int buf_size; + }; struct bpf_aio_buf buf; struct bpf_aio_work *work; const struct bpf_aio_complete_ops *ops; struct kiocb iocb; + void *private_data; }; typedef void (*bpf_aio_complete_t)(struct bpf_aio *io, long ret); @@ -68,6 +72,38 @@ static inline unsigned int bpf_aio_get_op(const struct bpf_aio *aio) return aio->opf & BPF_AIO_OP_MASK; } +/* Must be called from kfunc defined in consumer subsystem */ +static inline void bpf_aio_assign_cb(struct bpf_aio *aio, + const struct bpf_aio_complete_ops *ops) +{ + aio->ops = ops; +} + +/* + * Skip `skip` bytes and assign the advanced source buffer for `aio`, so + * we can cover this part of source buffer by this `aio` + */ +static inline void bpf_aio_assign_buf(struct bpf_aio *aio, + const struct bpf_aio_buf *src, unsigned skip, + unsigned bytes) +{ + const struct bio_vec *bvec, *end; + struct bpf_aio_buf *abuf = &aio->buf; + + skip += src->bvec_off; + for (bvec = src->bvec, end = bvec + src->nr_bvec; bvec < end; bvec++) { + if (likely(skip < bvec->bv_len)) + break; + skip -= bvec->bv_len; + } + + aio->buf_size = bytes; + abuf->bvec_off = skip; + abuf->nr_bvec = src->nr_bvec - (bvec - src->bvec); + abuf->bvec = bvec; +} + + int bpf_aio_init(void); int bpf_aio_struct_ops_init(void); struct bpf_aio *bpf_aio_alloc(unsigned int op, enum bpf_aio_flag aio_flags); diff --git a/drivers/block/ublk/ublk.h b/drivers/block/ublk/ublk.h index 2c33f6a94bf2..4bd04512c894 100644 --- a/drivers/block/ublk/ublk.h +++ b/drivers/block/ublk/ublk.h @@ -8,6 +8,7 @@ #include <uapi/linux/ublk_cmd.h> #include "bpf_reg.h" +#include "bpf_aio.h" #define UBLK_MINORS (1U << MINORBITS) @@ -47,6 +48,7 @@ struct ublk_bpf_io { unsigned long flags; refcount_t ref; int res; + struct bpf_aio_buf buf; }; struct ublk_rq_data { -- 2.47.0