Re: [PATCH 3/4] io_uring: cache task cancelation state in the ctx

Gabriel Krisman Bertazi <krisman@xxxxxxx> · Mon, 20 Feb 2023 11:43:14 -0300

Jens Axboe <axboe@xxxxxxxxx> writes:

> It can be quite expensive for the fast paths to deference
> req->task->io_uring->in_cancel for the (very) unlikely scenario that
> we're currently undergoing cancelations.
>
> Add a ctx bit to indicate if we're currently canceling or not, so that
> the hot path may check this rather than dip into the remote task
> state.
>
> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
>  include/linux/io_uring_types.h |  2 ++
>  io_uring/io_uring.c            | 44 ++++++++++++++++++++++++++++++++--
>  2 files changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index 00689c12f6ab..42d704adb9c6 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -211,6 +211,8 @@ struct io_ring_ctx {
>  		enum task_work_notify_mode	notify_method;
>  		struct io_rings			*rings;
>  		struct task_struct		*submitter_task;
> +		/* local ctx cache of task cancel state */
> +		unsigned long			in_cancel;

minor nit:

even though the real tctx value is ulong, the cache could just be a bitfield
alongside the many others in this structure. you only care if it is >0
or 0 when peeking the cache.

either way,

Reviewed-by: Gabriel Krisman Bertazi <krisman@xxxxxxx>

>  		struct percpu_ref		refs;
>  	} ____cacheline_aligned_in_smp;
>  
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 64e07df034d1..0fcb532db1fc 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -3192,6 +3192,46 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
>  	return percpu_counter_sum(&tctx->inflight);
>  }
>  
> +static __cold void io_uring_dec_cancel(struct io_uring_task *tctx,
> +				       struct io_sq_data *sqd)
> +{
> +	if (!atomic_dec_return(&tctx->in_cancel))
> +		return;
> +
> +	if (!sqd) {
> +		struct io_tctx_node *node;
> +		unsigned long index;
> +
> +		xa_for_each(&tctx->xa, index, node)
> +			clear_bit(0, &node->ctx->in_cancel);
> +	} else {
> +		struct io_ring_ctx *ctx;
> +
> +		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
> +			clear_bit(0, &ctx->in_cancel);
> +	}
> +}
> +
> +static __cold void io_uring_inc_cancel(struct io_uring_task *tctx,
> +				       struct io_sq_data *sqd)
> +{
> +	if (atomic_inc_return(&tctx->in_cancel) != 1)
> +		return;
> +
> +	if (!sqd) {
> +		struct io_tctx_node *node;
> +		unsigned long index;
> +
> +		xa_for_each(&tctx->xa, index, node)
> +			set_bit(0, &node->ctx->in_cancel);
> +	} else {
> +		struct io_ring_ctx *ctx;
> +
> +		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
> +			set_bit(0, &ctx->in_cancel);
> +	}
> +}
> +
>  /*
>   * Find any io_uring ctx that this task has registered or done IO on, and cancel
>   * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
> @@ -3210,7 +3250,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  	if (tctx->io_wq)
>  		io_wq_exit_start(tctx->io_wq);
>  
> -	atomic_inc(&tctx->in_cancel);
> +	io_uring_inc_cancel(tctx, sqd);
>  	do {
>  		bool loop = false;
>  
> @@ -3263,7 +3303,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  		 * We shouldn't run task_works after cancel, so just leave
>  		 * ->in_cancel set for normal exit.
>  		 */
> -		atomic_dec(&tctx->in_cancel);
> +		io_uring_dec_cancel(tctx, sqd);
>  		/* for exec all current's requests should be gone, kill tctx */
>  		__io_uring_free(current);
>  	}

-- 
Gabriel Krisman Bertazi