On 10/25/19 3:55 AM, Pavel Begunkov wrote: > I found 2 problems with __io_sequence_defer(). > > 1. it uses @sq_dropped, but doesn't consider @cq_overflow > 2. @sq_dropped and @cq_overflow are write-shared with userspace, so > it can be maliciously changed. > > see sent liburing test (test/defer *_hung()), which left an unkillable > process for me OK, how about the below. I'll split this in two, as it's really two separate fixes. diff --git a/fs/io_uring.c b/fs/io_uring.c index 5d10984381cf..5d9d960c1c17 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -191,6 +191,7 @@ struct io_ring_ctx { unsigned sq_entries; unsigned sq_mask; unsigned sq_thread_idle; + unsigned cached_sq_dropped; struct io_uring_sqe *sq_sqes; struct list_head defer_list; @@ -208,6 +209,7 @@ struct io_ring_ctx { struct { unsigned cached_cq_tail; + unsigned cached_cq_overflow; unsigned cq_entries; unsigned cq_mask; struct wait_queue_head cq_wait; @@ -419,7 +421,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, struct io_kiocb *req) { - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; + return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped + + ctx->cached_cq_overflow; } static inline bool io_sequence_defer(struct io_ring_ctx *ctx, @@ -590,9 +593,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, WRITE_ONCE(cqe->res, res); WRITE_ONCE(cqe->flags, 0); } else { - unsigned overflow = READ_ONCE(ctx->rings->cq_overflow); - - WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1); + ctx->cached_cq_overflow++; + WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow); } } @@ -2601,7 +2603,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) /* drop invalid entries */ ctx->cached_sq_head++; - rings->sq_dropped++; + ctx->cached_sq_dropped++; + WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped); return false; } @@ -2685,6 +2688,7 @@ static int io_sq_thread(void *data) timeout = inflight = 0; while (!kthread_should_park()) { + unsigned prev_cq, cur_cq; bool mm_fault = false; unsigned int to_submit; @@ -2767,8 +2771,12 @@ static int io_sq_thread(void *data) } to_submit = min(to_submit, ctx->sq_entries); + prev_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow; inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL, mm_fault); + cur_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow; + if ((ctx->flags & IORING_SETUP_IOPOLL) && (prev_cq != cur_cq)) + inflight -= cur_cq - prev_cq; /* Commit SQ ring head once we've consumed all SQEs */ io_commit_sqring(ctx); -- Jens Axboe