Re: [PATCH 3/8] io_uring/rw: implement vectored registered rw

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 3, 2025 at 3:01 PM Caleb Sander Mateos
<csander@xxxxxxxxxxxxxxx> wrote:
>
> On Mon, Mar 3, 2025 at 7:50 AM Pavel Begunkov <asml.silence@xxxxxxxxx> wrote:
> >
> > Implement registered buffer vectored reads with new opcodes
> > IORING_OP_WRITEV_FIXED and IORING_OP_READV_FIXED.
> >
> > Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
> > ---
> >  include/uapi/linux/io_uring.h |  2 ++
> >  io_uring/opdef.c              | 39 +++++++++++++++++++++++++++
> >  io_uring/rw.c                 | 51 +++++++++++++++++++++++++++++++++++
> >  io_uring/rw.h                 |  2 ++
> >  4 files changed, 94 insertions(+)
> >
> > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> > index 1e02e94bc26d..9dd384b369ee 100644
> > --- a/include/uapi/linux/io_uring.h
> > +++ b/include/uapi/linux/io_uring.h
> > @@ -280,6 +280,8 @@ enum io_uring_op {
> >         IORING_OP_BIND,
> >         IORING_OP_LISTEN,
> >         IORING_OP_RECV_ZC,
> > +       IORING_OP_READV_FIXED,
> > +       IORING_OP_WRITEV_FIXED,
> >
> >         /* this goes last, obviously */
> >         IORING_OP_LAST,
> > diff --git a/io_uring/opdef.c b/io_uring/opdef.c
> > index 9511262c513e..6655d2cbf74d 100644
> > --- a/io_uring/opdef.c
> > +++ b/io_uring/opdef.c
> > @@ -529,6 +529,35 @@ const struct io_issue_def io_issue_defs[] = {
> >                 .prep                   = io_eopnotsupp_prep,
> >  #endif
> >         },
> > +       [IORING_OP_READV_FIXED] = {
> > +               .needs_file             = 1,
> > +               .unbound_nonreg_file    = 1,
> > +               .pollin                 = 1,
> > +               .plug                   = 1,
> > +               .audit_skip             = 1,
> > +               .ioprio                 = 1,
> > +               .iopoll                 = 1,
> > +               .iopoll_queue           = 1,
> > +               .vectored               = 1,
> > +               .async_size             = sizeof(struct io_async_rw),
> > +               .prep                   = io_prep_readv_fixed,
> > +               .issue                  = io_read,
> > +       },
> > +       [IORING_OP_WRITEV_FIXED] = {
> > +               .needs_file             = 1,
> > +               .hash_reg_file          = 1,
> > +               .unbound_nonreg_file    = 1,
> > +               .pollout                = 1,
> > +               .plug                   = 1,
> > +               .audit_skip             = 1,
> > +               .ioprio                 = 1,
> > +               .iopoll                 = 1,
> > +               .iopoll_queue           = 1,
> > +               .vectored               = 1,
> > +               .async_size             = sizeof(struct io_async_rw),
> > +               .prep                   = io_prep_writev_fixed,
> > +               .issue                  = io_write,
> > +       },
> >  };
> >
> >  const struct io_cold_def io_cold_defs[] = {
> > @@ -761,6 +790,16 @@ const struct io_cold_def io_cold_defs[] = {
> >         [IORING_OP_RECV_ZC] = {
> >                 .name                   = "RECV_ZC",
> >         },
> > +       [IORING_OP_READV_FIXED] = {
> > +               .name                   = "READV_FIXED",
> > +               .cleanup                = io_readv_writev_cleanup,
> > +               .fail                   = io_rw_fail,
> > +       },
> > +       [IORING_OP_WRITEV_FIXED] = {
> > +               .name                   = "WRITEV_FIXED",
> > +               .cleanup                = io_readv_writev_cleanup,
> > +               .fail                   = io_rw_fail,
> > +       },
> >  };
> >
> >  const char *io_uring_get_opcode(u8 opcode)
> > diff --git a/io_uring/rw.c b/io_uring/rw.c
> > index ad7f647d48e9..4c4229f41aaa 100644
> > --- a/io_uring/rw.c
> > +++ b/io_uring/rw.c
> > @@ -381,6 +381,57 @@ int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> >         return __io_prep_rw(req, sqe, ITER_SOURCE);
> >  }
> >
> > +static int io_rw_prep_reg_vec(struct io_kiocb *req, int ddir)
> > +{
> > +       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> > +       struct io_async_rw *io = req->async_data;
> > +       const struct iovec __user *uvec;
> > +       size_t uvec_segs = rw->len;
> > +       struct iovec *iov;
> > +       int iovec_off, ret;
> > +       void *res;
> > +
> > +       if (uvec_segs > io->vec.nr) {
> > +               ret = io_vec_realloc(&io->vec, uvec_segs);
> > +               if (ret)
> > +                       return ret;
> > +               req->flags |= REQ_F_NEED_CLEANUP;
> > +       }
> > +       /* pad iovec to the right */
> > +       iovec_off = io->vec.nr - uvec_segs;
> > +       iov = io->vec.iovec + iovec_off;
> > +       uvec = u64_to_user_ptr(rw->addr);
> > +       res = iovec_from_user(uvec, uvec_segs, uvec_segs, iov,
> > +                             io_is_compat(req->ctx));
> > +       if (IS_ERR(res))
> > +               return PTR_ERR(res);
> > +
> > +       ret = io_import_reg_vec(ddir, &io->iter, req, &io->vec,
> > +                               uvec_segs, iovec_off, 0);
>
> So the iovecs are being imported at prep time rather than issue time?
> I suppose since only user registered buffers are allowed and not
> kernel bvecs, you aren't concerned about interactions with the ublk
> bvec register/unregister operations? I think in principle the
> difference between prep and issue time is still observable if the same
> registered buffer index is being used alternately for user and kernel
> registered buffers.

Never mind, I see you change this in the next patch.

Best,
Caleb





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux