From: Jens Axboe <axboe@xxxxxxxxx> Normal SQEs are 64-bytes in length, which is fine for all the commands we support. However, in preparation for supporting passthrough IO, provide an option for setting up a ring with 128-byte SQEs. We continue to use the same type for io_uring_sqe, it's marked and commented with a zero sized array pad at the end. This provides up to 80 bytes of data for a passthrough command - 64 bytes for the extra added data, and 16 bytes available at the end of the existing SQE. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 13 ++++++++++--- include/uapi/linux/io_uring.h | 7 +++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a7412f6862fc..241ba1cd6dcf 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7431,8 +7431,12 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) * though the application is the one updating it. */ head = READ_ONCE(ctx->sq_array[sq_idx]); - if (likely(head < ctx->sq_entries)) + if (likely(head < ctx->sq_entries)) { + /* double index for 128-byte SQEs, twice as long */ + if (ctx->flags & IORING_SETUP_SQE128) + head <<= 1; return &ctx->sq_sqes[head]; + } /* drop invalid entries */ ctx->cq_extra--; @@ -10431,7 +10435,10 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->sq_ring_entries = p->sq_entries; rings->cq_ring_entries = p->cq_entries; - size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); + if (p->flags & IORING_SETUP_SQE128) + size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries); + else + size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); if (size == SIZE_MAX) { io_mem_free(ctx->rings); ctx->rings = NULL; @@ -10643,7 +10650,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | - IORING_SETUP_R_DISABLED)) + IORING_SETUP_R_DISABLED | IORING_SETUP_SQE128)) return -EINVAL; return io_uring_create(entries, &p, params); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 787f491f0d2a..c5db68433ca5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -61,6 +61,12 @@ struct io_uring_sqe { __u32 file_index; }; __u64 __pad2[2]; + + /* + * If the ring is initializefd with IORING_SETUP_SQE128, then this field + * contains 64-bytes of padding, doubling the size of the SQE. + */ + __u64 __big_sqe_pad[0]; }; enum { @@ -101,6 +107,7 @@ enum { #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ +#define IORING_SETUP_SQE128 (1U << 7) /* SQEs are 128b */ enum { IORING_OP_NOP, -- 2.25.1