On 25/10/2019 19:03, Jens Axboe wrote: > On 10/25/19 3:55 AM, Pavel Begunkov wrote: >> I found 2 problems with __io_sequence_defer(). >> >> 1. it uses @sq_dropped, but doesn't consider @cq_overflow >> 2. @sq_dropped and @cq_overflow are write-shared with userspace, so >> it can be maliciously changed. >> >> see sent liburing test (test/defer *_hung()), which left an unkillable >> process for me > > OK, how about the below. I'll split this in two, as it's really two > separate fixes. cached_sq_dropped is good, but I was concerned about cached_cq_overflow. io_cqring_fill_event() can be called in async, so shouldn't we do some synchronisation then? > > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 5d10984381cf..5d9d960c1c17 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -191,6 +191,7 @@ struct io_ring_ctx { > unsigned sq_entries; > unsigned sq_mask; > unsigned sq_thread_idle; > + unsigned cached_sq_dropped; > struct io_uring_sqe *sq_sqes; > > struct list_head defer_list; > @@ -208,6 +209,7 @@ struct io_ring_ctx { > > struct { > unsigned cached_cq_tail; > + unsigned cached_cq_overflow; > unsigned cq_entries; > unsigned cq_mask; > struct wait_queue_head cq_wait; > @@ -419,7 +421,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) > static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, > struct io_kiocb *req) > { > - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; > + return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped > + + ctx->cached_cq_overflow; > } > > static inline bool io_sequence_defer(struct io_ring_ctx *ctx, > @@ -590,9 +593,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, > WRITE_ONCE(cqe->res, res); > WRITE_ONCE(cqe->flags, 0); > } else { > - unsigned overflow = READ_ONCE(ctx->rings->cq_overflow); > - > - WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1); > + ctx->cached_cq_overflow++; > + WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow); > } > } > > @@ -2601,7 +2603,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) > > /* drop invalid entries */ > ctx->cached_sq_head++; > - rings->sq_dropped++; > + ctx->cached_sq_dropped++; > + WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped); > return false; > } > > @@ -2685,6 +2688,7 @@ static int io_sq_thread(void *data) > > timeout = inflight = 0; > while (!kthread_should_park()) { > + unsigned prev_cq, cur_cq; > bool mm_fault = false; > unsigned int to_submit; > > @@ -2767,8 +2771,12 @@ static int io_sq_thread(void *data) > } > > to_submit = min(to_submit, ctx->sq_entries); > + prev_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow; > inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL, > mm_fault); > + cur_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow; > + if ((ctx->flags & IORING_SETUP_IOPOLL) && (prev_cq != cur_cq)) > + inflight -= cur_cq - prev_cq; > > /* Commit SQ ring head once we've consumed all SQEs */ > io_commit_sqring(ctx); > -- Yours sincerely, Pavel Begunkov
Attachment:
signature.asc
Description: OpenPGP digital signature