On 7/8/20 9:39 AM, Xiaoguang Wang wrote: > hi, > >> On 7/7/20 11:29 PM, Xiaoguang Wang wrote: >>> I modify above test program a bit: >>> #include <errno.h> >>> #include <stdio.h> >>> #include <unistd.h> >>> #include <stdlib.h> >>> #include <string.h> >>> #include <fcntl.h> >>> #include <assert.h> >>> >>> #include "liburing.h" >>> >>> static void test_cq_overflow(struct io_uring *ring) >>> { >>> struct io_uring_cqe *cqe; >>> struct io_uring_sqe *sqe; >>> int issued = 0; >>> int ret = 0; >>> int i; >>> >>> for (i = 0; i < 33; i++) { >>> sqe = io_uring_get_sqe(ring); >>> if (!sqe) { >>> fprintf(stderr, "get sqe failed\n"); >>> break;; >>> } >>> ret = io_uring_submit(ring); >>> if (ret <= 0) { >>> if (ret != -EBUSY) >>> fprintf(stderr, "sqe submit failed: %d\n", ret); >>> break; >>> } >>> issued++; >>> } >>> >>> printf("issued requests: %d\n", issued); >>> >>> while (issued) { >>> ret = io_uring_peek_cqe(ring, &cqe); >>> if (ret) { >>> if (ret != -EAGAIN) { >>> fprintf(stderr, "peek completion failed: %s\n", >>> strerror(ret)); >>> break; >>> } >>> printf("left requets: %d %d\n", issued, IO_URING_READ_ONCE(*ring->sq.kflags)); >>> continue; >>> } >>> io_uring_cqe_seen(ring, cqe); >>> issued--; >>> printf("left requets: %d\n", issued); >>> } >>> } >>> >>> int main(int argc, char *argv[]) >>> { >>> int ret; >>> struct io_uring ring; >>> >>> ret = io_uring_queue_init(16, &ring, 0); >>> if (ret) { >>> fprintf(stderr, "ring setup failed: %d\n", ret); >>> return 1; >>> } >>> >>> test_cq_overflow(&ring); >>> return 0; >>> } >>> >>> Though with your patches applied, we still can not peek the last cqe. >>> This test program will only issue 33 sqes, so it won't get EBUSY error. >> >> How about we make this even simpler, then - make the >> IORING_SQ_CQ_OVERFLOW actually track the state, rather than when we fail >> on submission. The liburing change would be the same, the kernel side >> would then look like the below. >> >> >> diff --git a/fs/io_uring.c b/fs/io_uring.c >> index 4c9a494c9f9f..01981926cdf4 100644 >> --- a/fs/io_uring.c >> +++ b/fs/io_uring.c >> @@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) >> if (cqe) { >> clear_bit(0, &ctx->sq_check_overflow); >> clear_bit(0, &ctx->cq_check_overflow); >> + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; >> } >> spin_unlock_irqrestore(&ctx->completion_lock, flags); >> io_cqring_ev_posted(ctx); >> @@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) >> if (list_empty(&ctx->cq_overflow_list)) { >> set_bit(0, &ctx->sq_check_overflow); >> set_bit(0, &ctx->cq_check_overflow); >> + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; >> } >> req->flags |= REQ_F_OVERFLOW; >> refcount_inc(&req->refs); >> @@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data) >> } >> >> /* Tell userspace we may need a wakeup call */ >> + spin_lock_irq(&ctx->completion_lock); >> ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; >> - /* make sure to read SQ tail after writing flags */ >> - smp_mb(); >> + spin_unlock_irq(&ctx->completion_lock); >> >> to_submit = io_sqring_entries(ctx); >> if (!to_submit || ret == -EBUSY) { >> @@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data) >> } >> finish_wait(&ctx->sqo_wait, &wait); >> >> + spin_lock_irq(&ctx->completion_lock); >> ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; >> + spin_unlock_irq(&ctx->completion_lock); >> } >> >> mutex_lock(&ctx->uring_lock); >> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >> index 8d033961cb78..91953b543125 100644 >> --- a/include/uapi/linux/io_uring.h >> +++ b/include/uapi/linux/io_uring.h >> @@ -198,6 +198,7 @@ struct io_sqring_offsets { >> * sq_ring->flags >> */ >> #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ >> +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* app needs to enter kernel */ >> >> struct io_cqring_offsets { >> __u32 head; >> > Looks good, I think it'll work now. > I'll test and send patches soon, thanks. One missing bit clear, and I corrected that last comment. Just base on this one, thanks! diff --git a/fs/io_uring.c b/fs/io_uring.c index 4c9a494c9f9f..0b6a4b2d7e76 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) if (cqe) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->sq_check_overflow); set_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; } req->flags |= REQ_F_OVERFLOW; refcount_inc(&req->refs); @@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data) } /* Tell userspace we may need a wakeup call */ + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; - /* make sure to read SQ tail after writing flags */ - smp_mb(); + spin_unlock_irq(&ctx->completion_lock); to_submit = io_sqring_entries(ctx); if (!to_submit || ret == -EBUSY) { @@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data) } finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); } mutex_lock(&ctx->uring_lock); @@ -7810,6 +7814,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, if (list_empty(&ctx->cq_overflow_list)) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irq(&ctx->completion_lock); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8d033961cb78..d65fde732518 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -198,6 +198,7 @@ struct io_sqring_offsets { * sq_ring->flags */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ struct io_cqring_offsets { __u32 head; -- Jens Axboe