On 7/7/20 11:29 PM, Xiaoguang Wang wrote: > I modify above test program a bit: > #include <errno.h> > #include <stdio.h> > #include <unistd.h> > #include <stdlib.h> > #include <string.h> > #include <fcntl.h> > #include <assert.h> > > #include "liburing.h" > > static void test_cq_overflow(struct io_uring *ring) > { > struct io_uring_cqe *cqe; > struct io_uring_sqe *sqe; > int issued = 0; > int ret = 0; > int i; > > for (i = 0; i < 33; i++) { > sqe = io_uring_get_sqe(ring); > if (!sqe) { > fprintf(stderr, "get sqe failed\n"); > break;; > } > ret = io_uring_submit(ring); > if (ret <= 0) { > if (ret != -EBUSY) > fprintf(stderr, "sqe submit failed: %d\n", ret); > break; > } > issued++; > } > > printf("issued requests: %d\n", issued); > > while (issued) { > ret = io_uring_peek_cqe(ring, &cqe); > if (ret) { > if (ret != -EAGAIN) { > fprintf(stderr, "peek completion failed: %s\n", > strerror(ret)); > break; > } > printf("left requets: %d %d\n", issued, IO_URING_READ_ONCE(*ring->sq.kflags)); > continue; > } > io_uring_cqe_seen(ring, cqe); > issued--; > printf("left requets: %d\n", issued); > } > } > > int main(int argc, char *argv[]) > { > int ret; > struct io_uring ring; > > ret = io_uring_queue_init(16, &ring, 0); > if (ret) { > fprintf(stderr, "ring setup failed: %d\n", ret); > return 1; > } > > test_cq_overflow(&ring); > return 0; > } > > Though with your patches applied, we still can not peek the last cqe. > This test program will only issue 33 sqes, so it won't get EBUSY error. How about we make this even simpler, then - make the IORING_SQ_CQ_OVERFLOW actually track the state, rather than when we fail on submission. The liburing change would be the same, the kernel side would then look like the below. diff --git a/fs/io_uring.c b/fs/io_uring.c index 4c9a494c9f9f..01981926cdf4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) if (cqe) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->sq_check_overflow); set_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; } req->flags |= REQ_F_OVERFLOW; refcount_inc(&req->refs); @@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data) } /* Tell userspace we may need a wakeup call */ + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; - /* make sure to read SQ tail after writing flags */ - smp_mb(); + spin_unlock_irq(&ctx->completion_lock); to_submit = io_sqring_entries(ctx); if (!to_submit || ret == -EBUSY) { @@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data) } finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); } mutex_lock(&ctx->uring_lock); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8d033961cb78..91953b543125 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -198,6 +198,7 @@ struct io_sqring_offsets { * sq_ring->flags */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* app needs to enter kernel */ struct io_cqring_offsets { __u32 head; -- Jens Axboe