Re: [PATCH v2] io_uring: add timeout support for io_uring_enter()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Jens,

Could you please review this patch?

Thinks,
Jiufei

On 2020/8/4 下午5:28, Jiufei Xue wrote:
> Now users who want to get woken when waiting for events should submit a
> timeout command first. It is not safe for applications that split SQ and
> CQ handling between two threads, such as mysql. Users should synchronize
> the two threads explicitly to protect SQ and that will impact the
> performance.
> 
> This patch adds support for timeout to existing io_uring_enter(). To
> avoid overloading arguments, it introduces a new parameter structure
> which contains sigmask and timeout.
> 
> I have tested the workloads with one thread submiting nop requests
> while the other reaping the cqe with timeout. It shows 1.8~2x faster
> when the iodepth is 16.
> 
> Signed-off-by: Jiufei Xue <jiufei.xue@xxxxxxxxxxxxxxxxx>
> ---
>  fs/io_uring.c                 | 45 +++++++++++++++++++++++++++++++++++++------
>  include/uapi/linux/io_uring.h |  7 +++++++
>  2 files changed, 46 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 2a3af95..cdd89e4 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -6514,7 +6514,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
>   * application must reap them itself, as they reside on the shared cq ring.
>   */
>  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
> -			  const sigset_t __user *sig, size_t sigsz)
> +			  const sigset_t __user *sig, size_t sigsz,
> +			  struct __kernel_timespec __user *uts)
>  {
>  	struct io_wait_queue iowq = {
>  		.wq = {
> @@ -6526,6 +6527,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  		.to_wait	= min_events,
>  	};
>  	struct io_rings *rings = ctx->rings;
> +	struct timespec64 ts;
> +	signed long timeout = 0;
>  	int ret = 0;
>  
>  	do {
> @@ -6548,6 +6551,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  			return ret;
>  	}
>  
> +	if (uts) {
> +		if (get_timespec64(&ts, uts))
> +			return -EFAULT;
> +		timeout = timespec64_to_jiffies(&ts);
> +	}
> +
>  	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
>  	trace_io_uring_cqring_wait(ctx, min_events);
>  	do {
> @@ -6569,7 +6578,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  		}
>  		if (io_should_wake(&iowq, false))
>  			break;
> -		schedule();
> +		if (uts) {
> +			if ((timeout = schedule_timeout(timeout)) == 0) {
> +				ret = -ETIME;
> +				break;
> +			}
> +		} else {
> +			schedule();
> +		}
>  	} while (1);
>  	finish_wait(&ctx->wait, &iowq.wq);
>  
> @@ -7993,19 +8009,36 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>  #endif /* !CONFIG_MMU */
>  
>  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
> -		u32, min_complete, u32, flags, const sigset_t __user *, sig,
> +		u32, min_complete, u32, flags, const void __user *, argp,
>  		size_t, sigsz)
>  {
>  	struct io_ring_ctx *ctx;
>  	long ret = -EBADF;
>  	int submitted = 0;
>  	struct fd f;
> +	const sigset_t __user *sig;
> +	struct __kernel_timespec __user *ts;
> +	struct io_uring_getevents_arg arg;
>  
>  	io_run_task_work();
>  
> -	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
> +	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
> +		      IORING_ENTER_GETEVENTS_TIMEOUT))
>  		return -EINVAL;
>  
> +	/* deal with IORING_ENTER_GETEVENTS_TIMEOUT */
> +	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
> +		if (!(flags & IORING_ENTER_GETEVENTS))
> +			return -EINVAL;
> +		if (copy_from_user(&arg, argp, sizeof(arg)))
> +			return -EFAULT;
> +		sig = arg.sigmask;
> +		ts = arg.ts;
> +	} else {
> +		sig = (const sigset_t __user *)argp;
> +		ts = NULL;
> +	}
> +
>  	f = fdget(fd);
>  	if (!f.file)
>  		return -EBADF;
> @@ -8052,7 +8085,7 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>  		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
>  			ret = io_iopoll_check(ctx, min_complete);
>  		} else {
> -			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
> +			ret = io_cqring_wait(ctx, min_complete, sig, sigsz, ts);
>  		}
>  	}
>  
> @@ -8346,7 +8379,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
>  	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
>  			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
>  			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
> -			IORING_FEAT_POLL_32BITS;
> +			IORING_FEAT_POLL_32BITS | IORING_FEAT_GETEVENTS_TIMEOUT;
>  
>  	if (copy_to_user(params, p, sizeof(*p))) {
>  		ret = -EFAULT;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index d65fde7..70764d2 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -224,6 +224,7 @@ struct io_cqring_offsets {
>   */
>  #define IORING_ENTER_GETEVENTS	(1U << 0)
>  #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
> +#define IORING_ENTER_GETEVENTS_TIMEOUT	(1U << 2)
>  
>  /*
>   * Passed in for io_uring_setup(2). Copied back with updated info on success
> @@ -251,6 +252,7 @@ struct io_uring_params {
>  #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
>  #define IORING_FEAT_FAST_POLL		(1U << 5)
>  #define IORING_FEAT_POLL_32BITS 	(1U << 6)
> +#define IORING_FEAT_GETEVENTS_TIMEOUT	(1U << 7)
>  
>  /*
>   * io_uring_register(2) opcodes and arguments
> @@ -290,4 +292,9 @@ struct io_uring_probe {
>  	struct io_uring_probe_op ops[0];
>  };
>  
> +struct io_uring_getevents_arg {
> +	sigset_t *sigmask;
> +	struct __kernel_timespec *ts;
> +};
> +
>  #endif
> 



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux