On 18/06/2020 17:43, Jens Axboe wrote: > If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt > the buffered read to an io-wq worker. Instead we can rely on page > unlocking callbacks to support retry based async IO. This is a lot more > efficient than doing async thread offload. > > The retry is done similarly to how we handle poll based retry. From > the unlock callback, we simply queue the retry to a task_work based > handler. > > Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> > --- > fs/io_uring.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 137 insertions(+), 8 deletions(-) > ... > static int io_read(struct io_kiocb *req, bool force_nonblock) > { > struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; > @@ -2784,10 +2907,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) > unsigned long nr_segs = iter.nr_segs; > ssize_t ret2 = 0; > > - if (req->file->f_op->read_iter) > - ret2 = call_read_iter(req->file, kiocb, &iter); > - else > - ret2 = loop_rw_iter(READ, req->file, kiocb, &iter); > + ret2 = io_iter_do_read(req, &iter); > > /* Catch -EAGAIN return for forced non-blocking submission */ > if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { > @@ -2799,17 +2919,26 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) > ret = io_setup_async_rw(req, io_size, iovec, > inline_vecs, &iter); > if (ret) > - goto out_free; > + goto out; > /* any defer here is final, must blocking retry */ > if (!(req->flags & REQ_F_NOWAIT) && > !file_can_poll(req->file)) > req->flags |= REQ_F_MUST_PUNT; > + /* if we can retry, do so with the callbacks armed */ > + if (io_rw_should_retry(req)) { > + ret2 = io_iter_do_read(req, &iter); > + if (ret2 == -EIOCBQUEUED) { > + goto out; > + } else if (ret2 != -EAGAIN) { > + kiocb_done(kiocb, ret2); > + goto out; > + } > + } > + kiocb->ki_flags &= ~IOCB_WAITQ; > return -EAGAIN; > } > } > -out_free: > - kfree(iovec); > - req->flags &= ~REQ_F_NEED_CLEANUP; This looks fishy. For instance, if it fails early on rw_verify_area(), how would it free yet on-stack iovec? Is it handled somehow? > +out: > return ret; > } > > -- Pavel Begunkov