Most opcodes don't support using the ioprio field, it's really only something that read/write to storage media supports. Overlay a flags2 variable with ioprio, so we can grow our flags space. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 40 ++++++++++++++++++++++++----------- include/uapi/linux/io_uring.h | 8 ++++++- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 29153958ea78..06afe4db5a9a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -108,7 +108,10 @@ IOSQE_IO_HARDLINK | IOSQE_ASYNC) #define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \ - IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) + IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS | \ + IOSQE_FLAGS2) + +#define SQE_VALID_FLAGS2 0 #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA) @@ -788,9 +791,10 @@ enum { REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, + REQ_F_FLAGS2_BIT = IOSQE_FLAGS2_BIT, - /* first byte is taken by user flags, shift it to not overlap */ - REQ_F_FAIL_BIT = 8, + /* first bits are taken by user flags, shift it to not overlap */ + REQ_F_FAIL_BIT = 9, REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, REQ_F_NOWAIT_BIT, @@ -831,6 +835,8 @@ enum { REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), /* IOSQE_CQE_SKIP_SUCCESS */ REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + /* IOSQE_FLAGS2 */ + REQ_F_FLAGS2 = BIT(REQ_F_FLAGS2_BIT), /* fail rest of links */ REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), @@ -3280,15 +3286,16 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) kiocb->ki_pos = READ_ONCE(sqe->off); - ioprio = READ_ONCE(sqe->ioprio); - if (ioprio) { - ret = ioprio_check_cap(ioprio); - if (ret) - return ret; + kiocb->ki_ioprio = get_current_ioprio(); + if (!(req->flags & REQ_F_FLAGS2)) { + ioprio = READ_ONCE(sqe->ioprio); + if (ioprio) { + ret = ioprio_check_cap(ioprio); + if (ret) + return ret; - kiocb->ki_ioprio = ioprio; - } else { - kiocb->ki_ioprio = get_current_ioprio(); + kiocb->ki_ioprio = ioprio; + } } req->imu = NULL; @@ -7779,6 +7786,14 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, return -EOPNOTSUPP; io_init_req_drain(req); } + if (sqe_flags & IOSQE_FLAGS2) { + unsigned int sqe_flags2; + + sqe_flags2 = READ_ONCE(sqe->flags2); + if (sqe_flags2 & ~SQE_VALID_FLAGS2) + return -EINVAL; + req->flags |= sqe_flags | (sqe_flags2 << 8U); + } } if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) { if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags)) @@ -7794,7 +7809,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, } } - if (!io_op_defs[opcode].ioprio && sqe->ioprio) + if (!io_op_defs[opcode].ioprio && sqe->ioprio && + !(req->flags & REQ_F_FLAGS2)) return -EINVAL; if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index fad63564678a..622f6e27a444 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -17,7 +17,10 @@ struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ __u8 flags; /* IOSQE_ flags */ - __u16 ioprio; /* ioprio for the request */ + union { + __u16 ioprio; /* ioprio for the request */ + __u16 flags2; /* extra flags */ + }; __s32 fd; /* file descriptor to do IO on */ union { __u64 off; /* offset into file */ @@ -71,6 +74,7 @@ enum { IOSQE_ASYNC_BIT, IOSQE_BUFFER_SELECT_BIT, IOSQE_CQE_SKIP_SUCCESS_BIT, + IOSQE_FLAGS2_BIT, }; /* @@ -90,6 +94,8 @@ enum { #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) /* don't post CQE if request succeeded */ #define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) +/* flags2 is valid and set */ +#define IOSQE_FLAGS2 (1U << IOSQE_FLAGS2_BIT) /* * io_uring_setup() flags -- 2.35.1