Re: [PATCH] io_uring: export cq overflow status to userspace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi,

On 7/8/20 9:39 AM, Xiaoguang Wang wrote:
hi,

On 7/7/20 11:29 PM, Xiaoguang Wang wrote:
I modify above test program a bit:
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <assert.h>

#include "liburing.h"

static void test_cq_overflow(struct io_uring *ring)
{
           struct io_uring_cqe *cqe;
           struct io_uring_sqe *sqe;
           int issued = 0;
           int ret = 0;
           int i;

           for (i = 0; i < 33; i++) {
                   sqe = io_uring_get_sqe(ring);
                   if (!sqe) {
                           fprintf(stderr, "get sqe failed\n");
                           break;;
                   }
                   ret = io_uring_submit(ring);
                   if (ret <= 0) {
                           if (ret != -EBUSY)
                                   fprintf(stderr, "sqe submit failed: %d\n", ret);
                           break;
                   }
                   issued++;
           }

           printf("issued requests: %d\n", issued);

           while (issued) {
                   ret = io_uring_peek_cqe(ring, &cqe);
                   if (ret) {
                           if (ret != -EAGAIN) {
                                   fprintf(stderr, "peek completion failed: %s\n",
                                           strerror(ret));
                                   break;
                           }
                           printf("left requets: %d %d\n", issued, IO_URING_READ_ONCE(*ring->sq.kflags));
                           continue;
                   }
                   io_uring_cqe_seen(ring, cqe);
                   issued--;
                   printf("left requets: %d\n", issued);
           }
}

int main(int argc, char *argv[])
{
           int ret;
           struct io_uring ring;

           ret = io_uring_queue_init(16, &ring, 0);
           if (ret) {
                   fprintf(stderr, "ring setup failed: %d\n", ret);
                   return 1;
           }

           test_cq_overflow(&ring);
           return 0;
}

Though with your patches applied, we still can not peek the last cqe.
This test program will only issue 33 sqes, so it won't get EBUSY error.

How about we make this even simpler, then - make the
IORING_SQ_CQ_OVERFLOW actually track the state, rather than when we fail
on submission. The liburing change would be the same, the kernel side
would then look like the below.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4c9a494c9f9f..01981926cdf4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
   	if (cqe) {
   		clear_bit(0, &ctx->sq_check_overflow);
   		clear_bit(0, &ctx->cq_check_overflow);
+		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
   	}
   	spin_unlock_irqrestore(&ctx->completion_lock, flags);
   	io_cqring_ev_posted(ctx);
@@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
   		if (list_empty(&ctx->cq_overflow_list)) {
   			set_bit(0, &ctx->sq_check_overflow);
   			set_bit(0, &ctx->cq_check_overflow);
+			ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
Some callers to __io_cqring_fill_event() don't hold completion_lock, for example:
==> io_iopoll_complete
====> __io_cqring_fill_event()
So this patch maybe still not safe when SQPOLL is enabled.
Do you perfer adding a new lock or just do completion_lock here only when cq ring is overflowed?

Regards,
Xiaoguang Wang

   		}
   		req->flags |= REQ_F_OVERFLOW;
   		refcount_inc(&req->refs);
@@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data)
   			}
/* Tell userspace we may need a wakeup call */
+			spin_lock_irq(&ctx->completion_lock);
   			ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
-			/* make sure to read SQ tail after writing flags */
-			smp_mb();
+			spin_unlock_irq(&ctx->completion_lock);
to_submit = io_sqring_entries(ctx);
   			if (!to_submit || ret == -EBUSY) {
@@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data)
   			}
   			finish_wait(&ctx->sqo_wait, &wait);
+ spin_lock_irq(&ctx->completion_lock);
   			ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+			spin_unlock_irq(&ctx->completion_lock);
   		}
mutex_lock(&ctx->uring_lock);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8d033961cb78..91953b543125 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -198,6 +198,7 @@ struct io_sqring_offsets {
    * sq_ring->flags
    */
   #define IORING_SQ_NEED_WAKEUP	(1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW	(1U << 1) /* app needs to enter kernel */
struct io_cqring_offsets {
   	__u32 head;

Looks good, I think it'll work now.
I'll test and send patches soon, thanks.

One missing bit clear, and I corrected that last comment. Just base on
this one, thanks!


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4c9a494c9f9f..0b6a4b2d7e76 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
  	if (cqe) {
  		clear_bit(0, &ctx->sq_check_overflow);
  		clear_bit(0, &ctx->cq_check_overflow);
+		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
  	}
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
  	io_cqring_ev_posted(ctx);
@@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
  		if (list_empty(&ctx->cq_overflow_list)) {
  			set_bit(0, &ctx->sq_check_overflow);
  			set_bit(0, &ctx->cq_check_overflow);
+			ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
  		}
  		req->flags |= REQ_F_OVERFLOW;
  		refcount_inc(&req->refs);
@@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data)
  			}
/* Tell userspace we may need a wakeup call */
+			spin_lock_irq(&ctx->completion_lock);
  			ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
-			/* make sure to read SQ tail after writing flags */
-			smp_mb();
+			spin_unlock_irq(&ctx->completion_lock);
to_submit = io_sqring_entries(ctx);
  			if (!to_submit || ret == -EBUSY) {
@@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data)
  			}
  			finish_wait(&ctx->sqo_wait, &wait);
+ spin_lock_irq(&ctx->completion_lock);
  			ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+			spin_unlock_irq(&ctx->completion_lock);
  		}
mutex_lock(&ctx->uring_lock);
@@ -7810,6 +7814,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
  			if (list_empty(&ctx->cq_overflow_list)) {
  				clear_bit(0, &ctx->sq_check_overflow);
  				clear_bit(0, &ctx->cq_check_overflow);
+				ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
  			}
  			spin_unlock_irq(&ctx->completion_lock);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8d033961cb78..d65fde732518 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -198,6 +198,7 @@ struct io_sqring_offsets {
   * sq_ring->flags
   */
  #define IORING_SQ_NEED_WAKEUP	(1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW	(1U << 1) /* CQ ring is overflown */
struct io_cqring_offsets {
  	__u32 head;




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux