Re: [BUG] io_uring: defer logic based on shared data

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 25/10/2019 19:03, Jens Axboe wrote:
> On 10/25/19 3:55 AM, Pavel Begunkov wrote:
>> I found 2 problems with __io_sequence_defer().
>>
>> 1. it uses @sq_dropped, but doesn't consider @cq_overflow
>> 2. @sq_dropped and @cq_overflow are write-shared with userspace, so
>> it can be maliciously changed.
>>
>> see sent liburing test (test/defer *_hung()), which left an unkillable
>> process for me
> 
> OK, how about the below. I'll split this in two, as it's really two
> separate fixes.
cached_sq_dropped is good, but I was concerned about cached_cq_overflow.
io_cqring_fill_event() can be called in async, so shouldn't we do some
synchronisation then?

> 
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5d10984381cf..5d9d960c1c17 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -191,6 +191,7 @@ struct io_ring_ctx {
>  		unsigned		sq_entries;
>  		unsigned		sq_mask;
>  		unsigned		sq_thread_idle;
> +		unsigned		cached_sq_dropped;
>  		struct io_uring_sqe	*sq_sqes;
>  
>  		struct list_head	defer_list;
> @@ -208,6 +209,7 @@ struct io_ring_ctx {
>  
>  	struct {
>  		unsigned		cached_cq_tail;
> +		unsigned		cached_cq_overflow;
>  		unsigned		cq_entries;
>  		unsigned		cq_mask;
>  		struct wait_queue_head	cq_wait;
> @@ -419,7 +421,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>  static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
>  				       struct io_kiocb *req)
>  {
> -	return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
> +	return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped
> +					+ ctx->cached_cq_overflow;
>  }
>  
>  static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
> @@ -590,9 +593,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
>  		WRITE_ONCE(cqe->res, res);
>  		WRITE_ONCE(cqe->flags, 0);
>  	} else {
> -		unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
> -
> -		WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
> +		ctx->cached_cq_overflow++;
> +		WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
>  	}
>  }
>  
> @@ -2601,7 +2603,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
>  
>  	/* drop invalid entries */
>  	ctx->cached_sq_head++;
> -	rings->sq_dropped++;
> +	ctx->cached_sq_dropped++;
> +	WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped);
>  	return false;
>  }
>  
> @@ -2685,6 +2688,7 @@ static int io_sq_thread(void *data)
>  
>  	timeout = inflight = 0;
>  	while (!kthread_should_park()) {
> +		unsigned prev_cq, cur_cq;
>  		bool mm_fault = false;
>  		unsigned int to_submit;
>  
> @@ -2767,8 +2771,12 @@ static int io_sq_thread(void *data)
>  		}
>  
>  		to_submit = min(to_submit, ctx->sq_entries);
> +		prev_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
>  		inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
>  					   mm_fault);
> +		cur_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
> +		if ((ctx->flags & IORING_SETUP_IOPOLL) && (prev_cq != cur_cq))
> +			inflight -= cur_cq - prev_cq;
>  
>  		/* Commit SQ ring head once we've consumed all SQEs */
>  		io_commit_sqring(ctx);
> 

-- 
Yours sincerely,
Pavel Begunkov

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux