On 01/03/2020 19:18, Pavel Begunkov wrote: > Get next request when dropping the submission reference. However, if > there is an asynchronous counterpart (i.e. read/write, timeout, etc), > that would be dangerous to do, so ignore them using new > REQ_F_DONT_STEAL_NEXT flag. > > Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> > --- > fs/io_uring.c | 121 ++++++++++++++++++++++++++------------------------ > 1 file changed, 62 insertions(+), 59 deletions(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index daf7c2095523..d456b0ff6835 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -488,6 +488,7 @@ enum { > REQ_F_NEED_CLEANUP_BIT, > REQ_F_OVERFLOW_BIT, > REQ_F_POLLED_BIT, > + REQ_F_DONT_STEAL_NEXT_BIT, > }; > > enum { > @@ -532,6 +533,8 @@ enum { > REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT), > /* already went through poll handler */ > REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), > + /* don't try to get next req, it's async and racy */ > + REQ_F_DONT_STEAL_NEXT = BIT(REQ_F_DONT_STEAL_NEXT_BIT), > }; > > struct async_poll { > @@ -1218,6 +1221,27 @@ static void io_cqring_add_event(struct io_kiocb *req, long res) > io_cqring_ev_posted(ctx); > } > > +static void io_link_work_cb(struct io_wq_work **workptr) > +{ > + struct io_wq_work *work = *workptr; > + struct io_kiocb *link = work->data; > + > + io_queue_linked_timeout(link); > + io_wq_submit_work(workptr); > +} > + > +static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) > +{ > + struct io_kiocb *link; > + > + *workptr = &nxt->work; > + link = io_prep_linked_timeout(nxt); > + if (link) { > + nxt->work.func = io_link_work_cb; > + nxt->work.data = link; > + } > +} > + > static inline bool io_is_fallback_req(struct io_kiocb *req) > { > return req == (struct io_kiocb *) > @@ -1518,17 +1542,28 @@ static void io_free_req(struct io_kiocb *req) > io_queue_async_work(nxt); > } > > -/* > - * Drop reference to request, return next in chain (if there is one) if this > - * was the last reference to this request. > - */ > -__attribute__((nonnull)) > -static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr) > +__attribute__((warn_unused_result)) > +static struct io_kiocb *io_put_req_submission(struct io_kiocb *req) > { > - if (refcount_dec_and_test(&req->refs)) { > - io_req_find_next(req, nxtptr); > + bool last_ref = refcount_dec_and_test(&req->refs); > + struct io_kiocb *nxt = NULL; > + > + if (!(req->flags & REQ_F_DONT_STEAL_NEXT) || last_ref) This is a bit racy, need to take @nxt first and then do atomic. The fix is trivial, so makes sense to review the rest. > + io_req_find_next(req, &nxt); > + if (last_ref) > __io_free_req(req); > - } > + > + return nxt; > +} > + > +static void io_put_req_async_submission(struct io_kiocb *req, > + struct io_wq_work **workptr) > +{ > + static struct io_kiocb *nxt; > + > + nxt = io_put_req_submission(req); > + if (nxt) > + io_wq_assign_next(workptr, nxt); > } > > static void io_put_req(struct io_kiocb *req) > @@ -1979,8 +2014,11 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, > > static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) > { > + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); > + > switch (ret) { > case -EIOCBQUEUED: > + req->flags |= REQ_F_DONT_STEAL_NEXT; > break; > case -ERESTARTSYS: > case -ERESTARTNOINTR: > @@ -2526,6 +2564,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) > if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC)) > return -EINVAL; > > + req->flags |= REQ_F_DONT_STEAL_NEXT; > req->sync.off = READ_ONCE(sqe->off); > req->sync.len = READ_ONCE(sqe->len); > return 0; > @@ -2543,27 +2582,6 @@ static bool io_req_cancelled(struct io_kiocb *req) > return false; > } > > -static void io_link_work_cb(struct io_wq_work **workptr) > -{ > - struct io_wq_work *work = *workptr; > - struct io_kiocb *link = work->data; > - > - io_queue_linked_timeout(link); > - io_wq_submit_work(workptr); > -} > - > -static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) > -{ > - struct io_kiocb *link; > - > - *workptr = &nxt->work; > - link = io_prep_linked_timeout(nxt); > - if (link) { > - nxt->work.func = io_link_work_cb; > - nxt->work.data = link; > - } > -} > - > static void __io_fsync(struct io_kiocb *req) > { > loff_t end = req->sync.off + req->sync.len; > @@ -2581,14 +2599,11 @@ static void __io_fsync(struct io_kiocb *req) > static void io_fsync_finish(struct io_wq_work **workptr) > { > struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); > - struct io_kiocb *nxt = NULL; > > if (io_req_cancelled(req)) > return; > __io_fsync(req); > - io_put_req(req); /* drop submission reference */ > - if (nxt) > - io_wq_assign_next(workptr, nxt); > + io_put_req_async_submission(req, workptr); > } > > static int io_fsync(struct io_kiocb *req, bool force_nonblock) > @@ -2617,14 +2632,11 @@ static void __io_fallocate(struct io_kiocb *req) > static void io_fallocate_finish(struct io_wq_work **workptr) > { > struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); > - struct io_kiocb *nxt = NULL; > > if (io_req_cancelled(req)) > return; > __io_fallocate(req); > - io_put_req(req); /* drop submission reference */ > - if (nxt) > - io_wq_assign_next(workptr, nxt); > + io_put_req_async_submission(req, workptr); > } > > static int io_fallocate_prep(struct io_kiocb *req, > @@ -2988,13 +3000,10 @@ static void __io_close_finish(struct io_kiocb *req) > static void io_close_finish(struct io_wq_work **workptr) > { > struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); > - struct io_kiocb *nxt = NULL; > > /* not cancellable, don't do io_req_cancelled() */ > __io_close_finish(req); > - io_put_req(req); /* drop submission reference */ > - if (nxt) > - io_wq_assign_next(workptr, nxt); > + io_put_req_async_submission(req, workptr); > } > > static int io_close(struct io_kiocb *req, bool force_nonblock) > @@ -3016,6 +3025,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock) > */ > io_queue_async_work(req); > /* submission ref will be dropped, take it for async */ > + req->flags |= REQ_F_DONT_STEAL_NEXT; > refcount_inc_not_zero(&req->refs); > return 0; > } > @@ -3062,14 +3072,11 @@ static void __io_sync_file_range(struct io_kiocb *req) > static void io_sync_file_range_finish(struct io_wq_work **workptr) > { > struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); > - struct io_kiocb *nxt = NULL; > > if (io_req_cancelled(req)) > return; > __io_sync_file_range(req); > - io_put_req(req); /* put submission ref */ > - if (nxt) > - io_wq_assign_next(workptr, nxt); > + io_put_req_async_submission(req, workptr); > } > > static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) > @@ -3435,14 +3442,11 @@ static int __io_accept(struct io_kiocb *req, bool force_nonblock) > static void io_accept_finish(struct io_wq_work **workptr) > { > struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); > - struct io_kiocb *nxt = NULL; > > if (io_req_cancelled(req)) > return; > __io_accept(req, false); > - io_put_req(req); /* drop submission reference */ > - if (nxt) > - io_wq_assign_next(workptr, nxt); > + io_put_req_async_submission(req, workptr); > } > #endif > > @@ -3859,9 +3863,10 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) > hash_del(&req->hash_node); > io_poll_complete(req, req->result, 0); > req->flags |= REQ_F_COMP_LOCKED; > - io_put_req_find_next(req, nxt); > spin_unlock_irq(&ctx->completion_lock); > > + req->flags &= ~REQ_F_DONT_STEAL_NEXT; > + *nxt = io_put_req_submission(req); > io_cqring_ev_posted(ctx); > } > > @@ -3944,6 +3949,8 @@ static int io_poll_add(struct io_kiocb *req) > io_cqring_ev_posted(ctx); > io_put_req(req); > } > + > + req->flags |= REQ_F_DONT_STEAL_NEXT; > return ipt.error; > } > > @@ -4066,6 +4073,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, > if (flags & ~IORING_TIMEOUT_ABS) > return -EINVAL; > > + req->flags |= REQ_F_DONT_STEAL_NEXT; > req->timeout.count = READ_ONCE(sqe->off); > > if (!req->io && io_alloc_async_ctx(req)) > @@ -4680,7 +4688,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr) > { > struct io_wq_work *work = *workptr; > struct io_kiocb *req = container_of(work, struct io_kiocb, work); > - struct io_kiocb *nxt = NULL; > int ret = 0; > > /* if NO_CANCEL is set, we must still run the work */ > @@ -4709,9 +4716,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr) > io_put_req(req); > } > > - io_put_req(req); /* drop submission reference */ > - if (nxt) > - io_wq_assign_next(workptr, nxt); > + io_put_req_async_submission(req, workptr); > } > > static int io_req_needs_file(struct io_kiocb *req, int fd) > @@ -4935,10 +4940,6 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) > } > > err: > - nxt = NULL; > - /* drop submission reference */ > - io_put_req_find_next(req, &nxt); > - > if (linked_timeout) { > if (!ret) > io_queue_linked_timeout(linked_timeout); > @@ -4952,6 +4953,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) > req_set_fail_links(req); > io_put_req(req); > } > + > + nxt = io_put_req_submission(req); > if (nxt) { > req = nxt; > > -- Pavel Begunkov