This patch adds the capability of sending meta along with read/write. This meta is represented by a newly introduced 'struct io_uring_meta' which specifies information such as meta type/flags/buffer/length and apptag. Application sets up a SQE128 ring, prepares io_uring_meta within the SQE at offset pointed by sqe->cmd. The patch processes the user-passed information to prepare uio_meta descriptor and passes it down using kiocb->private. Meta exchange is supported only for direct IO. Also vectored read/write operations with meta are not supported currently. Signed-off-by: Anuj Gupta <anuj20.g@xxxxxxxxxxx> Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx> --- include/linux/fs.h | 1 + include/uapi/linux/io_uring.h | 30 +++++++++++++++- io_uring/io_uring.c | 7 ++++ io_uring/rw.c | 68 +++++++++++++++++++++++++++++++++-- io_uring/rw.h | 9 ++++- 5 files changed, 110 insertions(+), 5 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index db26b4a70c62..0132565288c2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -330,6 +330,7 @@ struct readahead_control; #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) +#define IOCB_HAS_META (1 << 22) /* * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the * iocb completion can be passed back to the owner for execution from a safe diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2aaf7ee256ac..9140c66b315b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -101,12 +101,40 @@ struct io_uring_sqe { __u64 optval; /* * If the ring is initialized with IORING_SETUP_SQE128, then - * this field is used for 80 bytes of arbitrary command data + * this field is starting offset for 80 bytes of data. + * This data is opaque for uring command op. And for meta io, + * this contains 'struct io_uring_meta'. */ __u8 cmd[0]; }; }; +enum io_uring_sqe_meta_type_bits { + META_TYPE_INTEGRITY_BIT, + /* not a real meta type; just to make sure that we don't overflow */ + META_TYPE_LAST_BIT, +}; + +/* meta type flags */ +#define META_TYPE_INTEGRITY (1U << META_TYPE_INTEGRITY_BIT) + +struct io_uring_meta { + __u16 meta_type; + __u16 meta_flags; + __u32 meta_len; + __u64 meta_addr; + /* the next 64 bytes goes to SQE128 */ + __u16 apptag; + __u8 pad[62]; +}; + +/* + * flags for integrity meta + */ +#define INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ +#define INTEGRITY_CHK_APPTAG (1U << 1) /* enforce app tag check */ +#define INTEGRITY_CHK_REFTAG (1U << 2) /* enforce ref tag check */ + /* * If sqe->file_index is set to this for opcodes that instantiate a new * direct descriptor (like openat/openat2/accept), then io_uring will allocate diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7ed1e009aaec..0d26ee1193ca 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3704,6 +3704,13 @@ static int __init io_uring_init(void) /* top 8bits are for internal use */ BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0); + BUILD_BUG_ON(sizeof(struct io_uring_meta) > + 2 * sizeof(struct io_uring_sqe) - + offsetof(struct io_uring_sqe, cmd)); + + BUILD_BUG_ON(META_TYPE_LAST_BIT > + 8 * sizeof_field(struct io_uring_meta, meta_type)); + io_uring_optable_init(); /* diff --git a/io_uring/rw.c b/io_uring/rw.c index c004d21e2f12..e8f5b5af4d2f 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -23,6 +23,8 @@ #include "poll.h" #include "rw.h" +#define INTEGRITY_VALID_FLAGS (INTEGRITY_CHK_GUARD | INTEGRITY_CHK_APPTAG | \ + INTEGRITY_CHK_REFTAG) struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; @@ -247,6 +249,42 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import) return 0; } +static int io_prep_rw_meta(struct io_kiocb *req, const struct io_uring_sqe *sqe, + struct io_rw *rw, int ddir) +{ + const struct io_uring_meta *md = (struct io_uring_meta *)sqe->cmd; + u16 meta_type = READ_ONCE(md->meta_type); + const struct io_issue_def *def; + struct io_async_rw *io; + int ret; + + if (!meta_type) + return 0; + if (!(meta_type & META_TYPE_INTEGRITY)) + return -EINVAL; + + /* should fit into two bytes */ + BUILD_BUG_ON(INTEGRITY_VALID_FLAGS >= (1 << 16)); + + def = &io_issue_defs[req->opcode]; + if (def->vectored) + return -EOPNOTSUPP; + + io = req->async_data; + io->meta.flags = READ_ONCE(md->meta_flags); + if (io->meta.flags & ~INTEGRITY_VALID_FLAGS) + return -EINVAL; + + io->meta.apptag = READ_ONCE(md->apptag); + ret = import_ubuf(ddir, u64_to_user_ptr(READ_ONCE(md->meta_addr)), + READ_ONCE(md->meta_len), &io->meta.iter); + if (unlikely(ret < 0)) + return ret; + rw->kiocb.ki_flags |= IOCB_HAS_META; + iov_iter_save_state(&io->meta.iter, &io->iter_meta_state); + return ret; +} + static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir, bool do_import) { @@ -269,11 +307,16 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, rw->kiocb.ki_ioprio = get_current_ioprio(); } rw->kiocb.dio_complete = NULL; + rw->kiocb.ki_flags = 0; rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); - return io_prep_rw_setup(req, ddir, do_import); + ret = io_prep_rw_setup(req, ddir, do_import); + + if (unlikely(req->ctx->flags & IORING_SETUP_SQE128 && !ret)) + ret = io_prep_rw_meta(req, sqe, rw, ddir); + return ret; } int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -400,7 +443,10 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) static void io_resubmit_prep(struct io_kiocb *req) { struct io_async_rw *io = req->async_data; + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + if (unlikely(rw->kiocb.ki_flags & IOCB_HAS_META)) + iov_iter_restore(&io->meta.iter, &io->iter_meta_state); iov_iter_restore(&io->iter, &io->iter_state); } @@ -768,8 +814,12 @@ static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter) static bool need_complete_io(struct io_kiocb *req) { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + + /* Exclude meta IO as we don't support partial completion for that */ return req->flags & REQ_F_ISREG || - S_ISBLK(file_inode(req->file)->i_mode); + S_ISBLK(file_inode(req->file)->i_mode) || + !(rw->kiocb.ki_flags & IOCB_HAS_META); } static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) @@ -786,7 +836,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) if (!(req->flags & REQ_F_FIXED_FILE)) req->flags |= io_file_get_flags(file); - kiocb->ki_flags = file->f_iocb_flags; + kiocb->ki_flags |= file->f_iocb_flags; ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); if (unlikely(ret)) return ret; @@ -815,6 +865,14 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) kiocb->ki_complete = io_complete_rw; } + if (unlikely(kiocb->ki_flags & IOCB_HAS_META)) { + struct io_async_rw *io = req->async_data; + + if (!(req->file->f_flags & O_DIRECT)) + return -EOPNOTSUPP; + kiocb->private = &io->meta; + } + return 0; } @@ -881,6 +939,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) * manually if we need to. */ iov_iter_restore(&io->iter, &io->iter_state); + if (unlikely(kiocb->ki_flags & IOCB_HAS_META)) + iov_iter_restore(&io->meta.iter, &io->iter_meta_state); do { /* @@ -1091,6 +1151,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) } else { ret_eagain: iov_iter_restore(&io->iter, &io->iter_state); + if (unlikely(kiocb->ki_flags & IOCB_HAS_META)) + iov_iter_restore(&io->meta.iter, &io->iter_meta_state); if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); return -EAGAIN; diff --git a/io_uring/rw.h b/io_uring/rw.h index 3f432dc75441..49944b539c51 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -9,7 +9,14 @@ struct io_async_rw { struct iovec fast_iov; struct iovec *free_iovec; int free_iov_nr; - struct wait_page_queue wpq; + /* wpq is for buffered io, while meta fields are used with direct io*/ + union { + struct wait_page_queue wpq; + struct { + struct uio_meta meta; + struct iov_iter_state iter_meta_state; + }; + }; }; int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); -- 2.25.1