Add the ability to place attributes inline within SQE. Carve a new field that can accommodate 16 attribute flags: __u16 attr_inline_flags; Currently ATTR_FLAG_PI is defined, and future flags can be or-ed to specify the attributes that are placed inline. When ATTR_FLAG_PI is passed, application should also setup SQE128 ring and place PI information (i.e., struct io_uring_attr_pi) in the first 32b of second SQE. Signed-off-by: Anuj Gupta <anuj20.g@xxxxxxxxxxx> Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx> --- include/uapi/linux/io_uring.h | 13 +++++++++++- io_uring/io_uring.c | 6 +++++- io_uring/rw.c | 38 ++++++++++++++++++++++++++++++++--- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2e6808f6ba28..9c290c16e543 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -93,9 +93,11 @@ struct io_uring_sqe { __u16 __pad3[1]; }; struct { + /* used when extra attribute is passed inline SQE/SQE128 */ + __u16 attr_inline_flags; /* number of elements in the attribute vector */ __u8 nr_attr_indirect; - __u8 __pad4[3]; + __u8 __pad4[1]; }; }; union { @@ -126,6 +128,8 @@ struct io_uring_attr_vec { __u64 addr; }; +/* sqe->attr_inline_flags */ +#define ATTR_FLAG_PI (1U << ATTR_TYPE_PI) /* PI attribute information */ struct io_uring_attr_pi { __u16 flags; @@ -136,6 +140,13 @@ struct io_uring_attr_pi { __u64 rsvd; }; +/* Second half of SQE128 for IORING_OP_READ/WRITE */ +struct io_uring_sqe_ext { + /* if sqe->attr_inline_flags has ATTR_PI, first 32 bytes are for PI */ + struct io_uring_attr_pi rw_pi; + __u64 rsvd1[4]; +}; + /* * If sqe->file_index is set to this for opcodes that instantiate a new * direct descriptor (like openat/openat2/accept), then io_uring will allocate diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index e32dd118d7c8..3f975befe82e 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3866,8 +3866,9 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); BUILD_BUG_SQE_ELEM(44, __u32, file_index); BUILD_BUG_SQE_ELEM(44, __u16, addr_len); + BUILD_BUG_SQE_ELEM(44, __u16, attr_inline_flags); BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); - BUILD_BUG_SQE_ELEM(44, __u8, nr_attr_indirect); + BUILD_BUG_SQE_ELEM(46, __u8, nr_attr_indirect); BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); BUILD_BUG_SQE_ELEM(56, __u64, __pad2); @@ -3894,6 +3895,9 @@ static int __init io_uring_init(void) /* top 8bits are for internal use */ BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0); + BUILD_BUG_ON(sizeof(struct io_uring_sqe_ext) != + sizeof(struct io_uring_sqe)); + io_uring_optable_init(); /* diff --git a/io_uring/rw.c b/io_uring/rw.c index 93d7451b9370..d2d403ca6eb3 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -269,6 +269,11 @@ static inline void io_meta_restore(struct io_async_rw *io) iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta); } +static inline const void *io_uring_sqe_ext(const struct io_uring_sqe *sqe) +{ + return (sqe + 1); +} + static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir, const struct io_uring_attr_pi *pi_attr) { @@ -343,11 +348,34 @@ static int io_prep_attr_vec(struct io_kiocb *req, struct io_rw *rw, int ddir, return 0; } +static int io_prep_inline_attr(struct io_kiocb *req, struct io_rw *rw, + const struct io_uring_sqe *sqe, int ddir, + u16 attr_flags) +{ + const struct io_uring_sqe_ext *sqe_ext; + const struct io_uring_attr_pi *pi_attr; + + if (!(attr_flags & ATTR_FLAG_PI)) + return -EINVAL; + + if (!(req->ctx->flags & IORING_SETUP_SQE128)) + return -EINVAL; + + sqe_ext = io_uring_sqe_ext(sqe); + if (READ_ONCE(sqe_ext->rsvd1[0]) || READ_ONCE(sqe_ext->rsvd1[1]) + || READ_ONCE(sqe_ext->rsvd1[2]) || READ_ONCE(sqe_ext->rsvd1[3])) + return -EINVAL; + + pi_attr = &sqe_ext->rw_pi; + return io_prep_rw_pi(req, rw, ddir, pi_attr); +} + static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir, bool do_import) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned ioprio; + u16 attr_flags; u8 nr_attr_indirect; int ret; @@ -376,12 +404,16 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (unlikely(ret)) return ret; + attr_flags = READ_ONCE(sqe->attr_inline_flags); nr_attr_indirect = READ_ONCE(sqe->nr_attr_indirect); - if (nr_attr_indirect) { + if (attr_flags) { + if (READ_ONCE(sqe->__pad4[0]) || nr_attr_indirect) + return -EINVAL; + ret = io_prep_inline_attr(req, rw, sqe, ddir, attr_flags); + } else if (nr_attr_indirect) { u64 attr_vec_usr_addr = READ_ONCE(sqe->attr_vec_addr); - if (READ_ONCE(sqe->__pad4[0]) || READ_ONCE(sqe->__pad4[1]) || - READ_ONCE(sqe->__pad4[2])) + if (READ_ONCE(sqe->__pad4[0])) return -EINVAL; ret = io_prep_attr_vec(req, rw, ddir, attr_vec_usr_addr, -- 2.25.1