Define an io_uring_cmd_sqe struct that passthrough commands can use, and define an array that has offset information for the two members that we care about (user_data and personality). Then we can init the two command types in basically the same way, just reading the user_data and personality at the defined offsets for the command type. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 57 +++++++++++++++++++++++++++-------- include/uapi/linux/io_uring.h | 10 ++++++ 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 416e47832468..a4699b066172 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -824,6 +824,22 @@ struct io_defer_entry { u32 seq; }; +struct sqe_offset { + unsigned char user_data; + unsigned char personality; +}; + +static struct sqe_offset sqe_offsets[] = { + { + .user_data = offsetof(struct io_uring_sqe, user_data), + .personality = offsetof(struct io_uring_sqe, personality) + }, + { + .user_data = offsetof(struct io_uring_cmd_sqe, user_data), + .personality = offsetof(struct io_uring_cmd_sqe, personality) + } +}; + struct io_op_def { /* needs req->file assigned */ unsigned needs_file : 1; @@ -844,6 +860,8 @@ struct io_op_def { unsigned plug : 1; /* size of async data needed, if any */ unsigned short async_size; + /* offset definition for user_data/personality */ + unsigned short offsets; }; static const struct io_op_def io_op_defs[] = { @@ -988,6 +1006,9 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_RENAMEAT] = {}, [IORING_OP_UNLINKAT] = {}, + [IORING_OP_URING_CMD] = { + .offsets = 1, + }, }; static bool io_disarm_next(struct io_kiocb *req); @@ -6384,16 +6405,21 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx, } static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe *sqe) + const struct io_uring_sqe_hdr *hdr) { struct io_submit_state *state; + const struct io_op_def *def; unsigned int sqe_flags; + const __u64 *uptr; + const __u16 *pptr; int personality, ret = 0; - req->opcode = READ_ONCE(sqe->hdr.opcode); + req->opcode = READ_ONCE(hdr->opcode); + def = &io_op_defs[req->opcode]; /* same numerical values with corresponding REQ_F_*, safe to copy */ - req->flags = sqe_flags = READ_ONCE(sqe->hdr.flags); - req->user_data = READ_ONCE(sqe->user_data); + req->flags = sqe_flags = READ_ONCE(hdr->flags); + uptr = (const void *) hdr + sqe_offsets[def->offsets].user_data; + req->user_data = READ_ONCE(*uptr); req->async_data = NULL; req->file = NULL; req->ctx = ctx; @@ -6419,11 +6445,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(!io_check_restriction(ctx, req, sqe_flags))) return -EACCES; - if ((sqe_flags & IOSQE_BUFFER_SELECT) && - !io_op_defs[req->opcode].buffer_select) + if ((sqe_flags & IOSQE_BUFFER_SELECT) && !def->buffer_select) return -EOPNOTSUPP; - personality = READ_ONCE(sqe->personality); + pptr = (const void *) hdr + sqe_offsets[def->offsets].personality; + personality = READ_ONCE(*pptr); if (personality) { req->work.creds = xa_load(&ctx->personalities, personality); if (!req->work.creds) @@ -6436,17 +6462,15 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, * Plug now if we have more than 1 IO left after this, and the target * is potentially a read/write to block based storage. */ - if (!state->plug_started && state->ios_left > 1 && - io_op_defs[req->opcode].plug) { + if (!state->plug_started && state->ios_left > 1 && def->plug) { blk_start_plug(&state->plug); state->plug_started = true; } - if (io_op_defs[req->opcode].needs_file) { + if (def->needs_file) { bool fixed = req->flags & REQ_F_FIXED_FILE; - req->file = io_file_get(state, req, READ_ONCE(sqe->hdr.fd), - fixed); + req->file = io_file_get(state, req, READ_ONCE(hdr->fd), fixed); if (unlikely(!req->file)) ret = -EBADF; } @@ -6461,7 +6485,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, struct io_submit_link *link = &ctx->submit_state.link; int ret; - ret = io_init_req(ctx, req, sqe); + ret = io_init_req(ctx, req, &sqe->hdr); if (unlikely(ret)) { fail_req: io_req_complete_failed(req, ret); @@ -9915,6 +9939,7 @@ static int __init io_uring_init(void) #define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); + BUILD_BUG_ON(sizeof(struct io_uring_cmd_sqe) != 64); BUILD_BUG_SQE_ELEM(0, __u8, hdr.opcode); BUILD_BUG_SQE_ELEM(1, __u8, hdr.flags); BUILD_BUG_SQE_ELEM(2, __u16, hdr.ioprio); @@ -9943,6 +9968,12 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(40, __u16, buf_index); BUILD_BUG_SQE_ELEM(42, __u16, personality); BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); +#define BUILD_BUG_SQEC_ELEM(eoffset, etype, ename) \ + __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_cmd_sqe, eoffset, etype, ename) + BUILD_BUG_SQEC_ELEM(8, __u64, user_data); + BUILD_BUG_SQEC_ELEM(18, __u16, personality); + BUILD_BUG_SQEC_ELEM(sqe_offsets[1].user_data, __u64, user_data); + BUILD_BUG_SQEC_ELEM(sqe_offsets[1].personality, __u16, personality); BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int)); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5609474ccd9f..165ac406f00b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -74,6 +74,15 @@ struct io_uring_sqe { }; }; +struct io_uring_cmd_sqe { + struct io_uring_sqe_hdr hdr; + __u64 user_data; + __u16 op; + __u16 personality; + __u32 len; + __u64 pdu[5]; +}; + enum { IOSQE_FIXED_FILE_BIT, IOSQE_IO_DRAIN_BIT, @@ -148,6 +157,7 @@ enum { IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, + IORING_OP_URING_CMD, /* this goes last, obviously */ IORING_OP_LAST, -- 2.31.0