Re: [PATCH v2] io_uring: add timeout support for io_uring_enter()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 24/08/2020 02:49, Jiufei Xue wrote:
> ping...
> 
> On 2020/8/4 下午5:28, Jiufei Xue wrote:
>> Now users who want to get woken when waiting for events should submit a
>> timeout command first. It is not safe for applications that split SQ and
>> CQ handling between two threads, such as mysql. Users should synchronize
>> the two threads explicitly to protect SQ and that will impact the
>> performance.
>>
>> This patch adds support for timeout to existing io_uring_enter(). To
>> avoid overloading arguments, it introduces a new parameter structure
>> which contains sigmask and timeout.
>>
>> I have tested the workloads with one thread submiting nop requests
>> while the other reaping the cqe with timeout. It shows 1.8~2x faster
>> when the iodepth is 16.

What happened with this? I thought there were enough people wanting
such a thing.

>>
>> Signed-off-by: Jiufei Xue <jiufei.xue@xxxxxxxxxxxxxxxxx>
>> ---
>>  fs/io_uring.c                 | 45 +++++++++++++++++++++++++++++++++++++------
>>  include/uapi/linux/io_uring.h |  7 +++++++
>>  2 files changed, 46 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 2a3af95..cdd89e4 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -6514,7 +6514,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
>>   * application must reap them itself, as they reside on the shared cq ring.
>>   */
>>  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>> -			  const sigset_t __user *sig, size_t sigsz)
>> +			  const sigset_t __user *sig, size_t sigsz,
>> +			  struct __kernel_timespec __user *uts)
>>  {
>>  	struct io_wait_queue iowq = {
>>  		.wq = {
>> @@ -6526,6 +6527,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>>  		.to_wait	= min_events,
>>  	};
>>  	struct io_rings *rings = ctx->rings;
>> +	struct timespec64 ts;
>> +	signed long timeout = 0;
>>  	int ret = 0;
>>  
>>  	do {
>> @@ -6548,6 +6551,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>>  			return ret;
>>  	}
>>  
>> +	if (uts) {
>> +		if (get_timespec64(&ts, uts))
>> +			return -EFAULT;
>> +		timeout = timespec64_to_jiffies(&ts);
>> +	}
>> +
>>  	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
>>  	trace_io_uring_cqring_wait(ctx, min_events);
>>  	do {
>> @@ -6569,7 +6578,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>>  		}
>>  		if (io_should_wake(&iowq, false))
>>  			break;
>> -		schedule();
>> +		if (uts) {
>> +			if ((timeout = schedule_timeout(timeout)) == 0) {
>> +				ret = -ETIME;
>> +				break;
>> +			}
>> +		} else {
>> +			schedule();
>> +		}
>>  	} while (1);
>>  	finish_wait(&ctx->wait, &iowq.wq);
>>  
>> @@ -7993,19 +8009,36 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>>  #endif /* !CONFIG_MMU */
>>  
>>  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
>> -		u32, min_complete, u32, flags, const sigset_t __user *, sig,
>> +		u32, min_complete, u32, flags, const void __user *, argp,
>>  		size_t, sigsz)
>>  {
>>  	struct io_ring_ctx *ctx;
>>  	long ret = -EBADF;
>>  	int submitted = 0;
>>  	struct fd f;
>> +	const sigset_t __user *sig;
>> +	struct __kernel_timespec __user *ts;
>> +	struct io_uring_getevents_arg arg;
>>  
>>  	io_run_task_work();
>>  
>> -	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
>> +	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
>> +		      IORING_ENTER_GETEVENTS_TIMEOUT))
>>  		return -EINVAL;
>>  
>> +	/* deal with IORING_ENTER_GETEVENTS_TIMEOUT */
>> +	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
>> +		if (!(flags & IORING_ENTER_GETEVENTS))
>> +			return -EINVAL;
>> +		if (copy_from_user(&arg, argp, sizeof(arg)))
>> +			return -EFAULT;
>> +		sig = arg.sigmask;
>> +		ts = arg.ts;
>> +	} else {
>> +		sig = (const sigset_t __user *)argp;
>> +		ts = NULL;
>> +	}
>> +
>>  	f = fdget(fd);
>>  	if (!f.file)
>>  		return -EBADF;
>> @@ -8052,7 +8085,7 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>>  		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
>>  			ret = io_iopoll_check(ctx, min_complete);
>>  		} else {
>> -			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
>> +			ret = io_cqring_wait(ctx, min_complete, sig, sigsz, ts);
>>  		}
>>  	}
>>  
>> @@ -8346,7 +8379,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
>>  	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
>>  			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
>>  			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
>> -			IORING_FEAT_POLL_32BITS;
>> +			IORING_FEAT_POLL_32BITS | IORING_FEAT_GETEVENTS_TIMEOUT;
>>  
>>  	if (copy_to_user(params, p, sizeof(*p))) {
>>  		ret = -EFAULT;
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index d65fde7..70764d2 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -224,6 +224,7 @@ struct io_cqring_offsets {
>>   */
>>  #define IORING_ENTER_GETEVENTS	(1U << 0)
>>  #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
>> +#define IORING_ENTER_GETEVENTS_TIMEOUT	(1U << 2)
>>  
>>  /*
>>   * Passed in for io_uring_setup(2). Copied back with updated info on success
>> @@ -251,6 +252,7 @@ struct io_uring_params {
>>  #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
>>  #define IORING_FEAT_FAST_POLL		(1U << 5)
>>  #define IORING_FEAT_POLL_32BITS 	(1U << 6)
>> +#define IORING_FEAT_GETEVENTS_TIMEOUT	(1U << 7)
>>  
>>  /*
>>   * io_uring_register(2) opcodes and arguments
>> @@ -290,4 +292,9 @@ struct io_uring_probe {
>>  	struct io_uring_probe_op ops[0];
>>  };
>>  
>> +struct io_uring_getevents_arg {
>> +	sigset_t *sigmask;
>> +	struct __kernel_timespec *ts;
>> +};
>> +
>>  #endif
>>

-- 
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux