Sorry, I sent these comments on v3 to you directly and forgot to CC the list. Copying them here. On Tue, Feb 18, 2025 at 2:43 PM Keith Busch <kbusch@xxxxxxxx> wrote: > > From: Keith Busch <kbusch@xxxxxxxxxx> > > Provide new operations for the user to request mapping an active request > to an io uring instance's buf_table. The user has to provide the index > it wants to install the buffer. > > A reference count is taken on the request to ensure it can't be > completed while it is active in a ring's buf_table. > > Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 137 +++++++++++++++++++++++++--------- > include/uapi/linux/ublk_cmd.h | 4 + > 2 files changed, 105 insertions(+), 36 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 529085181f355..0c753176b14e9 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -51,6 +51,9 @@ > /* private ioctl command mirror */ > #define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC) > > +#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF) > +#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF) > + > /* All UBLK_F_* have to be included into UBLK_F_ALL */ > #define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \ > | UBLK_F_URING_CMD_COMP_IN_TASK \ > @@ -201,7 +204,7 @@ static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, > int tag); > static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) > { > - return ub->dev_info.flags & UBLK_F_USER_COPY; > + return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); > } > > static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) > @@ -581,7 +584,7 @@ static void ublk_apply_params(struct ublk_device *ub) > > static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) > { > - return ubq->flags & UBLK_F_USER_COPY; > + return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); > } > > static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) > @@ -1747,6 +1750,96 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd, > io_uring_cmd_mark_cancelable(cmd, issue_flags); > } > > +static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, > + struct ublk_queue *ubq, int tag, size_t offset) > +{ > + struct request *req; > + > + if (!ublk_need_req_ref(ubq)) > + return NULL; > + > + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); > + if (!req) > + return NULL; > + > + if (!ublk_get_req_ref(ubq, req)) > + return NULL; > + > + if (unlikely(!blk_mq_request_started(req) || req->tag != tag)) > + goto fail_put; > + > + if (!ublk_rq_has_data(req)) > + goto fail_put; > + > + if (offset > blk_rq_bytes(req)) > + goto fail_put; > + > + return req; > +fail_put: > + ublk_put_req_ref(ubq, req); > + return NULL; > +} > + > +static void ublk_io_release(void *priv) > +{ > + struct request *rq = priv; > + struct ublk_queue *ubq = rq->mq_hctx->driver_data; > + > + ublk_put_req_ref(ubq, rq); > +} > + > +static int ublk_register_io_buf(struct io_uring_cmd *cmd, > + struct ublk_queue *ubq, int tag, > + const struct ublksrv_io_cmd *ub_cmd, > + unsigned int issue_flags) > +{ > + struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx; > + struct ublk_device *ub = cmd->file->private_data; > + int index = (int)ub_cmd->addr, ret; Make index an unsigned to match io_buffer_register_bvec()? Same comment for ublk_unregister_io_buf(). > + struct ublk_rq_data *data; > + struct request *req; > + > + if (!ub) > + return -EPERM; __ublk_ch_uring_cmd() has already dereferenced ub = cmd->file->private_data, how is it possible to hit this? Same comment for ublk_unregister_io_buf() > + > + req = __ublk_check_and_get_req(ub, ubq, tag, 0); Consider moving the offset > blk_rq_bytes(req) check from __ublk_check_and_get_req() to ublk_check_and_get_req() so we don't need to pass an unused offset here. > + if (!req) > + return -EINVAL; > + > + data = blk_mq_rq_to_pdu(req); data appears unused in this function. Same comment for ublk_unregister_io_buf(). > + ret = io_buffer_register_bvec(ctx, req, ublk_io_release, index, > + issue_flags); > + if (ret) { > + ublk_put_req_ref(ubq, req); > + return ret; > + } > + > + return 0; > +} > + > +static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, > + struct ublk_queue *ubq, int tag, > + const struct ublksrv_io_cmd *ub_cmd, > + unsigned int issue_flags) Make tag an unsigned to match __ublk_ch_uring_cmd() and blk_mq_tag_to_rq()? Same comment for ublk_register_io_buf(). > +{ > + struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx; > + struct ublk_device *ub = cmd->file->private_data; > + int index = (int)ub_cmd->addr; > + struct ublk_rq_data *data; > + struct request *req; > + > + if (!ub) > + return -EPERM; > + > + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); > + if (!req) > + return -EINVAL; > + > + data = blk_mq_rq_to_pdu(req); > + io_buffer_unregister_bvec(ctx, index, issue_flags); Should we check that the registered bvec actually corresponds to this ublk request? Otherwise, I don't see a reason for the unregister command to involve the ublk request at all. Perhaps a generic io_uring "unregister buffer index" operation similar to IORING_OP_FILES_UPDATE would make more sense? Best, Caleb > + return 0; > +} > + > static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, > unsigned int issue_flags, > const struct ublksrv_io_cmd *ub_cmd) > @@ -1798,6 +1891,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, > > ret = -EINVAL; > switch (_IOC_NR(cmd_op)) { > + case UBLK_IO_REGISTER_IO_BUF: > + return ublk_register_io_buf(cmd, ubq, tag, ub_cmd, issue_flags); > + case UBLK_IO_UNREGISTER_IO_BUF: > + return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd, > + issue_flags); > case UBLK_IO_FETCH_REQ: > /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ > if (ublk_queue_ready(ubq)) { > @@ -1872,36 +1970,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, > return -EIOCBQUEUED; > } > > -static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, > - struct ublk_queue *ubq, int tag, size_t offset) > -{ > - struct request *req; > - > - if (!ublk_need_req_ref(ubq)) > - return NULL; > - > - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); > - if (!req) > - return NULL; > - > - if (!ublk_get_req_ref(ubq, req)) > - return NULL; > - > - if (unlikely(!blk_mq_request_started(req) || req->tag != tag)) > - goto fail_put; > - > - if (!ublk_rq_has_data(req)) > - goto fail_put; > - > - if (offset > blk_rq_bytes(req)) > - goto fail_put; > - > - return req; > -fail_put: > - ublk_put_req_ref(ubq, req); > - return NULL; > -} > - > static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, > unsigned int issue_flags) > { > @@ -2527,9 +2595,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) > goto out_free_dev_number; > } > > - /* We are not ready to support zero copy */ > - ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; > - > ub->dev_info.nr_hw_queues = min_t(unsigned int, > ub->dev_info.nr_hw_queues, nr_cpu_ids); > ublk_align_max_io_size(ub); > @@ -2860,7 +2925,7 @@ static int ublk_ctrl_get_features(struct io_uring_cmd *cmd) > { > const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); > void __user *argp = (void __user *)(unsigned long)header->addr; > - u64 features = UBLK_F_ALL & ~UBLK_F_SUPPORT_ZERO_COPY; > + u64 features = UBLK_F_ALL; > > if (header->len != UBLK_FEATURES_LEN || !header->addr) > return -EINVAL; > diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h > index a8bc98bb69fce..74246c926b55f 100644 > --- a/include/uapi/linux/ublk_cmd.h > +++ b/include/uapi/linux/ublk_cmd.h > @@ -94,6 +94,10 @@ > _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd) > #define UBLK_U_IO_NEED_GET_DATA \ > _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd) > +#define UBLK_U_IO_REGISTER_IO_BUF \ > + _IOWR('u', 0x23, struct ublksrv_io_cmd) > +#define UBLK_U_IO_UNREGISTER_IO_BUF \ > + _IOWR('u', 0x24, struct ublksrv_io_cmd) > > /* only ABORT means that no re-fetch */ > #define UBLK_IO_RES_OK 0 > -- > 2.43.5 >