Re: [PATCH] io_uring: export cq overflow status to userspace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 7/8/20 9:39 AM, Xiaoguang Wang wrote:
> hi,
> 
>> On 7/7/20 11:29 PM, Xiaoguang Wang wrote:
>>> I modify above test program a bit:
>>> #include <errno.h>
>>> #include <stdio.h>
>>> #include <unistd.h>
>>> #include <stdlib.h>
>>> #include <string.h>
>>> #include <fcntl.h>
>>> #include <assert.h>
>>>
>>> #include "liburing.h"
>>>
>>> static void test_cq_overflow(struct io_uring *ring)
>>> {
>>>           struct io_uring_cqe *cqe;
>>>           struct io_uring_sqe *sqe;
>>>           int issued = 0;
>>>           int ret = 0;
>>>           int i;
>>>
>>>           for (i = 0; i < 33; i++) {
>>>                   sqe = io_uring_get_sqe(ring);
>>>                   if (!sqe) {
>>>                           fprintf(stderr, "get sqe failed\n");
>>>                           break;;
>>>                   }
>>>                   ret = io_uring_submit(ring);
>>>                   if (ret <= 0) {
>>>                           if (ret != -EBUSY)
>>>                                   fprintf(stderr, "sqe submit failed: %d\n", ret);
>>>                           break;
>>>                   }
>>>                   issued++;
>>>           }
>>>
>>>           printf("issued requests: %d\n", issued);
>>>
>>>           while (issued) {
>>>                   ret = io_uring_peek_cqe(ring, &cqe);
>>>                   if (ret) {
>>>                           if (ret != -EAGAIN) {
>>>                                   fprintf(stderr, "peek completion failed: %s\n",
>>>                                           strerror(ret));
>>>                                   break;
>>>                           }
>>>                           printf("left requets: %d %d\n", issued, IO_URING_READ_ONCE(*ring->sq.kflags));
>>>                           continue;
>>>                   }
>>>                   io_uring_cqe_seen(ring, cqe);
>>>                   issued--;
>>>                   printf("left requets: %d\n", issued);
>>>           }
>>> }
>>>
>>> int main(int argc, char *argv[])
>>> {
>>>           int ret;
>>>           struct io_uring ring;
>>>
>>>           ret = io_uring_queue_init(16, &ring, 0);
>>>           if (ret) {
>>>                   fprintf(stderr, "ring setup failed: %d\n", ret);
>>>                   return 1;
>>>           }
>>>
>>>           test_cq_overflow(&ring);
>>>           return 0;
>>> }
>>>
>>> Though with your patches applied, we still can not peek the last cqe.
>>> This test program will only issue 33 sqes, so it won't get EBUSY error.
>>
>> How about we make this even simpler, then - make the
>> IORING_SQ_CQ_OVERFLOW actually track the state, rather than when we fail
>> on submission. The liburing change would be the same, the kernel side
>> would then look like the below.
>>
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 4c9a494c9f9f..01981926cdf4 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
>>   	if (cqe) {
>>   		clear_bit(0, &ctx->sq_check_overflow);
>>   		clear_bit(0, &ctx->cq_check_overflow);
>> +		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
>>   	}
>>   	spin_unlock_irqrestore(&ctx->completion_lock, flags);
>>   	io_cqring_ev_posted(ctx);
>> @@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
>>   		if (list_empty(&ctx->cq_overflow_list)) {
>>   			set_bit(0, &ctx->sq_check_overflow);
>>   			set_bit(0, &ctx->cq_check_overflow);
>> +			ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
>>   		}
>>   		req->flags |= REQ_F_OVERFLOW;
>>   		refcount_inc(&req->refs);
>> @@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data)
>>   			}
>>   
>>   			/* Tell userspace we may need a wakeup call */
>> +			spin_lock_irq(&ctx->completion_lock);
>>   			ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
>> -			/* make sure to read SQ tail after writing flags */
>> -			smp_mb();
>> +			spin_unlock_irq(&ctx->completion_lock);
>>   
>>   			to_submit = io_sqring_entries(ctx);
>>   			if (!to_submit || ret == -EBUSY) {
>> @@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data)
>>   			}
>>   			finish_wait(&ctx->sqo_wait, &wait);
>>   
>> +			spin_lock_irq(&ctx->completion_lock);
>>   			ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
>> +			spin_unlock_irq(&ctx->completion_lock);
>>   		}
>>   
>>   		mutex_lock(&ctx->uring_lock);
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 8d033961cb78..91953b543125 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -198,6 +198,7 @@ struct io_sqring_offsets {
>>    * sq_ring->flags
>>    */
>>   #define IORING_SQ_NEED_WAKEUP	(1U << 0) /* needs io_uring_enter wakeup */
>> +#define IORING_SQ_CQ_OVERFLOW	(1U << 1) /* app needs to enter kernel */
>>   
>>   struct io_cqring_offsets {
>>   	__u32 head;
>>
> Looks good, I think it'll work now.
> I'll test and send patches soon, thanks.

One missing bit clear, and I corrected that last comment. Just base on
this one, thanks!


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4c9a494c9f9f..0b6a4b2d7e76 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1342,6 +1342,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 	if (cqe) {
 		clear_bit(0, &ctx->sq_check_overflow);
 		clear_bit(0, &ctx->cq_check_overflow);
+		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
 	}
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 	io_cqring_ev_posted(ctx);
@@ -1379,6 +1380,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
 		if (list_empty(&ctx->cq_overflow_list)) {
 			set_bit(0, &ctx->sq_check_overflow);
 			set_bit(0, &ctx->cq_check_overflow);
+			ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
 		}
 		req->flags |= REQ_F_OVERFLOW;
 		refcount_inc(&req->refs);
@@ -6375,9 +6377,9 @@ static int io_sq_thread(void *data)
 			}
 
 			/* Tell userspace we may need a wakeup call */
+			spin_lock_irq(&ctx->completion_lock);
 			ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
-			/* make sure to read SQ tail after writing flags */
-			smp_mb();
+			spin_unlock_irq(&ctx->completion_lock);
 
 			to_submit = io_sqring_entries(ctx);
 			if (!to_submit || ret == -EBUSY) {
@@ -6400,7 +6402,9 @@ static int io_sq_thread(void *data)
 			}
 			finish_wait(&ctx->sqo_wait, &wait);
 
+			spin_lock_irq(&ctx->completion_lock);
 			ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+			spin_unlock_irq(&ctx->completion_lock);
 		}
 
 		mutex_lock(&ctx->uring_lock);
@@ -7810,6 +7814,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 			if (list_empty(&ctx->cq_overflow_list)) {
 				clear_bit(0, &ctx->sq_check_overflow);
 				clear_bit(0, &ctx->cq_check_overflow);
+				ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
 			}
 			spin_unlock_irq(&ctx->completion_lock);
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8d033961cb78..d65fde732518 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -198,6 +198,7 @@ struct io_sqring_offsets {
  * sq_ring->flags
  */
 #define IORING_SQ_NEED_WAKEUP	(1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW	(1U << 1) /* CQ ring is overflown */
 
 struct io_cqring_offsets {
 	__u32 head;

-- 
Jens Axboe




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux