Re: [PATCH 3/8] io_uring/rw: implement vectored registered rw

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 3, 2025 at 7:50 AM Pavel Begunkov <asml.silence@xxxxxxxxx> wrote:
>
> Implement registered buffer vectored reads with new opcodes
> IORING_OP_WRITEV_FIXED and IORING_OP_READV_FIXED.
>
> Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
> ---
>  include/uapi/linux/io_uring.h |  2 ++
>  io_uring/opdef.c              | 39 +++++++++++++++++++++++++++
>  io_uring/rw.c                 | 51 +++++++++++++++++++++++++++++++++++
>  io_uring/rw.h                 |  2 ++
>  4 files changed, 94 insertions(+)
>
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 1e02e94bc26d..9dd384b369ee 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -280,6 +280,8 @@ enum io_uring_op {
>         IORING_OP_BIND,
>         IORING_OP_LISTEN,
>         IORING_OP_RECV_ZC,
> +       IORING_OP_READV_FIXED,
> +       IORING_OP_WRITEV_FIXED,
>
>         /* this goes last, obviously */
>         IORING_OP_LAST,
> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
> index 9511262c513e..6655d2cbf74d 100644
> --- a/io_uring/opdef.c
> +++ b/io_uring/opdef.c
> @@ -529,6 +529,35 @@ const struct io_issue_def io_issue_defs[] = {
>                 .prep                   = io_eopnotsupp_prep,
>  #endif
>         },
> +       [IORING_OP_READV_FIXED] = {
> +               .needs_file             = 1,
> +               .unbound_nonreg_file    = 1,
> +               .pollin                 = 1,
> +               .plug                   = 1,
> +               .audit_skip             = 1,
> +               .ioprio                 = 1,
> +               .iopoll                 = 1,
> +               .iopoll_queue           = 1,
> +               .vectored               = 1,
> +               .async_size             = sizeof(struct io_async_rw),
> +               .prep                   = io_prep_readv_fixed,
> +               .issue                  = io_read,
> +       },
> +       [IORING_OP_WRITEV_FIXED] = {
> +               .needs_file             = 1,
> +               .hash_reg_file          = 1,
> +               .unbound_nonreg_file    = 1,
> +               .pollout                = 1,
> +               .plug                   = 1,
> +               .audit_skip             = 1,
> +               .ioprio                 = 1,
> +               .iopoll                 = 1,
> +               .iopoll_queue           = 1,
> +               .vectored               = 1,
> +               .async_size             = sizeof(struct io_async_rw),
> +               .prep                   = io_prep_writev_fixed,
> +               .issue                  = io_write,
> +       },
>  };
>
>  const struct io_cold_def io_cold_defs[] = {
> @@ -761,6 +790,16 @@ const struct io_cold_def io_cold_defs[] = {
>         [IORING_OP_RECV_ZC] = {
>                 .name                   = "RECV_ZC",
>         },
> +       [IORING_OP_READV_FIXED] = {
> +               .name                   = "READV_FIXED",
> +               .cleanup                = io_readv_writev_cleanup,
> +               .fail                   = io_rw_fail,
> +       },
> +       [IORING_OP_WRITEV_FIXED] = {
> +               .name                   = "WRITEV_FIXED",
> +               .cleanup                = io_readv_writev_cleanup,
> +               .fail                   = io_rw_fail,
> +       },
>  };
>
>  const char *io_uring_get_opcode(u8 opcode)
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index ad7f647d48e9..4c4229f41aaa 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -381,6 +381,57 @@ int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>         return __io_prep_rw(req, sqe, ITER_SOURCE);
>  }
>
> +static int io_rw_prep_reg_vec(struct io_kiocb *req, int ddir)
> +{
> +       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> +       struct io_async_rw *io = req->async_data;
> +       const struct iovec __user *uvec;
> +       size_t uvec_segs = rw->len;
> +       struct iovec *iov;
> +       int iovec_off, ret;
> +       void *res;
> +
> +       if (uvec_segs > io->vec.nr) {
> +               ret = io_vec_realloc(&io->vec, uvec_segs);
> +               if (ret)
> +                       return ret;
> +               req->flags |= REQ_F_NEED_CLEANUP;
> +       }
> +       /* pad iovec to the right */
> +       iovec_off = io->vec.nr - uvec_segs;
> +       iov = io->vec.iovec + iovec_off;
> +       uvec = u64_to_user_ptr(rw->addr);
> +       res = iovec_from_user(uvec, uvec_segs, uvec_segs, iov,
> +                             io_is_compat(req->ctx));
> +       if (IS_ERR(res))
> +               return PTR_ERR(res);
> +
> +       ret = io_import_reg_vec(ddir, &io->iter, req, &io->vec,
> +                               uvec_segs, iovec_off, 0);

So the iovecs are being imported at prep time rather than issue time?
I suppose since only user registered buffers are allowed and not
kernel bvecs, you aren't concerned about interactions with the ublk
bvec register/unregister operations? I think in principle the
difference between prep and issue time is still observable if the same
registered buffer index is being used alternately for user and kernel
registered buffers.

Best,
Caleb

> +       iov_iter_save_state(&io->iter, &io->iter_state);
> +       return ret;
> +}
> +
> +int io_prep_readv_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +       int ret;
> +
> +       ret = __io_prep_rw(req, sqe, ITER_DEST);
> +       if (unlikely(ret))
> +               return ret;
> +       return io_rw_prep_reg_vec(req, ITER_DEST);
> +}
> +
> +int io_prep_writev_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +       int ret;
> +
> +       ret = __io_prep_rw(req, sqe, ITER_SOURCE);
> +       if (unlikely(ret))
> +               return ret;
> +       return io_rw_prep_reg_vec(req, ITER_SOURCE);
> +}
> +
>  /*
>   * Multishot read is prepared just like a normal read/write request, only
>   * difference is that we set the MULTISHOT flag.
> diff --git a/io_uring/rw.h b/io_uring/rw.h
> index e86a3858f48b..475b6306a316 100644
> --- a/io_uring/rw.h
> +++ b/io_uring/rw.h
> @@ -34,6 +34,8 @@ struct io_async_rw {
>
>  int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
>  int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
> +int io_prep_readv_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
> +int io_prep_writev_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
>  int io_prep_readv(struct io_kiocb *req, const struct io_uring_sqe *sqe);
>  int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe);
>  int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe);
> --
> 2.48.1
>
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux