Re: [PATCH 3/5] io_uring: implement our own schedule timeout handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2024-08-19 16:28, Jens Axboe wrote:
> In preparation for having two distinct timeouts and avoid waking the
> task if we don't need to.
> 
> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
>  io_uring/io_uring.c | 41 ++++++++++++++++++++++++++++++++++++-----
>  io_uring/io_uring.h |  2 ++
>  2 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 9e2b8d4c05db..ddfbe04c61ed 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -2322,7 +2322,7 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
>  	 * Cannot safely flush overflowed CQEs from here, ensure we wake up
>  	 * the task, and the next invocation will do it.
>  	 */
> -	if (io_should_wake(iowq) || io_has_work(iowq->ctx))
> +	if (io_should_wake(iowq) || io_has_work(iowq->ctx) || iowq->hit_timeout)

iowq->hit_timeout may be modified in a timer softirq context, while this
wait_queue_func_t (AIUI) may get called from any context e.g.
net_rx_softirq for sockets. Does this need a READ_ONLY()?

>  		return autoremove_wake_function(curr, mode, wake_flags, key);
>  	return -1;
>  }
> @@ -2350,6 +2350,38 @@ static bool current_pending_io(void)
>  	return percpu_counter_read_positive(&tctx->inflight);
>  }
>  
> +static enum hrtimer_restart io_cqring_timer_wakeup(struct hrtimer *timer)
> +{
> +	struct io_wait_queue *iowq = container_of(timer, struct io_wait_queue, t);
> +	struct io_ring_ctx *ctx = iowq->ctx;
> +
> +	WRITE_ONCE(iowq->hit_timeout, 1);
> +	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
> +		wake_up_process(ctx->submitter_task);
> +	else
> +		io_cqring_wake(ctx);

This is a bit different to schedule_hrtimeout_range_clock(). Why is
io_cqring_wake() needed here for non-DEFER_TASKRUN?

> +	return HRTIMER_NORESTART;
> +}
> +
> +static int io_cqring_schedule_timeout(struct io_wait_queue *iowq,
> +				      clockid_t clock_id)
> +{
> +	iowq->hit_timeout = 0;
> +	hrtimer_init_on_stack(&iowq->t, clock_id, HRTIMER_MODE_ABS);
> +	iowq->t.function = io_cqring_timer_wakeup;
> +	hrtimer_set_expires_range_ns(&iowq->t, iowq->timeout, 0);
> +	hrtimer_start_expires(&iowq->t, HRTIMER_MODE_ABS);
> +
> +	if (!READ_ONCE(iowq->hit_timeout))
> +		schedule();
> +
> +	hrtimer_cancel(&iowq->t);
> +	destroy_hrtimer_on_stack(&iowq->t);
> +	__set_current_state(TASK_RUNNING);
> +
> +	return READ_ONCE(iowq->hit_timeout) ? -ETIME : 0;
> +}
> +
>  static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
>  				     struct io_wait_queue *iowq)
>  {
> @@ -2362,11 +2394,10 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
>  	 */
>  	if (current_pending_io())
>  		current->in_iowait = 1;
> -	if (iowq->timeout == KTIME_MAX)
> +	if (iowq->timeout != KTIME_MAX)
> +		ret = io_cqring_schedule_timeout(iowq, ctx->clockid);
> +	else
>  		schedule();
> -	else if (!schedule_hrtimeout_range_clock(&iowq->timeout, 0,
> -						 HRTIMER_MODE_ABS, ctx->clockid))
> -		ret = -ETIME;
>  	current->in_iowait = 0;
>  	return ret;
>  }
> diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
> index 9935819f12b7..f95c1b080f4b 100644
> --- a/io_uring/io_uring.h
> +++ b/io_uring/io_uring.h
> @@ -40,7 +40,9 @@ struct io_wait_queue {
>  	struct io_ring_ctx *ctx;
>  	unsigned cq_tail;
>  	unsigned nr_timeouts;
> +	int hit_timeout;
>  	ktime_t timeout;
> +	struct hrtimer t;
>  
>  #ifdef CONFIG_NET_RX_BUSY_POLL
>  	ktime_t napi_busy_poll_dt;




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux