On Thu, Apr 21, 2022 at 3:54 PM Stefan Roesch <shr@xxxxxx> wrote: > > Modify accesses to the CQE array to take large CQE's into account. The > index needs to be shifted by one for large CQE's. > > Signed-off-by: Stefan Roesch <shr@xxxxxx> > Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> > --- > fs/io_uring.c | 9 +++++++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index c93a9353c88d..bd352815b9e7 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -1909,8 +1909,12 @@ static noinline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx) > { > struct io_rings *rings = ctx->rings; > unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1); > + unsigned int shift = 0; > unsigned int free, queued, len; > > + if (ctx->flags & IORING_SETUP_CQE32) > + shift = 1; > + > /* userspace may cheat modifying the tail, be safe and do min */ > queued = min(__io_cqring_events(ctx), ctx->cq_entries); > free = ctx->cq_entries - queued; > @@ -1922,12 +1926,13 @@ static noinline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx) > ctx->cached_cq_tail++; > ctx->cqe_cached = &rings->cqes[off]; > ctx->cqe_sentinel = ctx->cqe_cached + len; > - return ctx->cqe_cached++; > + ctx->cqe_cached++; > + return &rings->cqes[off << shift]; > } > > static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) > { > - if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { > + if (likely(ctx->cqe_cached < ctx->cqe_sentinel && !(ctx->flags & IORING_SETUP_CQE32))) { > ctx->cached_cq_tail++; > return ctx->cqe_cached++; > } This excludes CQE-caching for 32b CQEs. How about something like below to have that enabled (adding io_get_cqe32 for the new ring) - +static noinline struct io_uring_cqe *__io_get_cqe32(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = ctx->rings; + unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1); + unsigned int free, queued, len; + + /* userspace may cheat modifying the tail, be safe and do min */ + queued = min(__io_cqring_events(ctx), ctx->cq_entries); + free = ctx->cq_entries - queued; + /* we need a contiguous range, limit based on the current array offset */ + len = min(free, ctx->cq_entries - off); + if (!len) + return NULL; + + ctx->cached_cq_tail++; + /* double increment for 32 CQEs */ + ctx->cqe_cached = &rings->cqes[off << 1]; + ctx->cqe_sentinel = ctx->cqe_cached + (len << 1); + return ctx->cqe_cached; +} + +static inline struct io_uring_cqe *io_get_cqe32(struct io_ring_ctx *ctx) +{ + struct io_uring_cqe *cqe32; + if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { + ctx->cached_cq_tail++; + cqe32 = ctx->cqe_cached; + } else + cqe32 = __io_get_cqe32(ctx); + /* double increment for 32b CQE*/ + ctx->cqe_cached += 2; + return cqe32; +}