On Wed, Jul 27, 2022 at 11:09:13AM +0800, Ziyang Zhang wrote: > On 2022/7/27 10:05, Ming Lei wrote: > > On Tue, Jul 26, 2022 at 07:44:05PM +0800, ZiyangZhang wrote: > >> UBLK_IO_NEED_GET_DATA is one ublk IO command. It is designed for a user > >> application who wants to allocate IO buffer and set IO buffer address > >> only after it receives an IO request from ublksrv. This is a reasonable > >> scenario because these users may use a RPC framework as one IO backend > >> to handle IO requests passed from ublksrv. And a RPC framework may > >> allocate its own buffer(or memory pool). > >> > >> This new feature (UBLK_F_NEED_GET_DATA) is optional for ublk users. > >> Related userspace code has been added in ublksrv[1] as one pull request. > >> > >> Test cases for this feature are added in ublksrv and all the tests pass. > >> The performance result shows that this new feature does bring additional > >> latency because one IO is issued back to ublk_drv once again to copy data > >> from bio vectors to user-provided data buffer. UBLK_IO_NEED_GET_DATA is > >> suitable for bigger block size such as 512B or 1MB. > >> > >> [1] https://github.com/ming1/ubdsrv > >> > >> Signed-off-by: ZiyangZhang <ZiyangZhang@xxxxxxxxxxxxxxxxx> > >> --- > >> drivers/block/ublk_drv.c | 88 ++++++++++++++++++++++++++++++++++++---- > >> 1 file changed, 79 insertions(+), 9 deletions(-) > >> > >> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > >> index 255b2de46a24..c2d2cd5ab25c 100644 > >> --- a/drivers/block/ublk_drv.c > >> +++ b/drivers/block/ublk_drv.c > >> @@ -47,7 +47,9 @@ > >> #define UBLK_MINORS (1U << MINORBITS) > >> > >> /* All UBLK_F_* have to be included into UBLK_F_ALL */ > >> -#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_URING_CMD_COMP_IN_TASK) > >> +#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \ > >> + | UBLK_F_URING_CMD_COMP_IN_TASK \ > >> + | UBLK_F_NEED_GET_DATA) > >> > >> struct ublk_rq_data { > >> struct callback_head work; > >> @@ -86,6 +88,15 @@ struct ublk_uring_cmd_pdu { > >> */ > >> #define UBLK_IO_FLAG_ABORTED 0x04 > >> > >> +/* > >> + * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires > >> + * get data buffer address from ublksrv. > >> + * > >> + * Then, bio data could be copied into this data buffer for a WRITE request > >> + * after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset. > >> + */ > >> +#define UBLK_IO_FLAG_NEED_GET_DATA 0x08 > >> + > >> struct ublk_io { > >> /* userspace buffer address from io cmd */ > >> __u64 addr; > >> @@ -163,11 +174,19 @@ static struct miscdevice ublk_misc; > >> static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) > >> { > >> if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) && > >> + !(ubq->flags & UBLK_F_NEED_GET_DATA) && > >> !(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK)) > >> return true; > >> return false; > > > > NEED_GET_DATA isn't related with using task work. > > > > That said if use task work return true, you use task work to handle > > GET_DATA, otherwise use io_uring_cmd_complete_in_task() to handle > > it, then either we use task work or use io_uring_cmd_complete_in_task, > > not mix the two. > > > > BTW, in my test, it is observed reliably that task work can get better iops. > > Ok, I will check whether UBLK_F_NEED_GET_DATA works with task_work_add(). > > I find that checking flags of ublk_io must stay in current ubq process. > Otherwise there may be potential data race while aborting IOs. > So whatever I use NEED_GET_DATA or not, task work is necessary. Yeah. > > Using task_work_add() or io_uring_cmd_complete_in_task() depends on: > (1) the module is built-in or not > (2) the user requires the feature: UBLK_F_URING_CMD_COMP_IN_TASK > > In the future, in our case, we may choose > UBLK_F_URING_CMD_COMP_IN_TASK | UBLK_F_NEED_GET_DATA as an option. That is fine, what I meant is that UBLK_F_NEED_GET_DATA doesn't have to require UBLK_F_URING_CMD_COMP_IN_TASK logically. > > > > >> } > >> > >> +static inline bool ublk_need_get_data(const struct ublk_queue *ubq) > >> +{ > >> + if (ubq->flags & UBLK_F_NEED_GET_DATA) > >> + return true; > >> + return false; > >> +} > >> + > >> static struct ublk_device *ublk_get_device(struct ublk_device *ub) > >> { > >> if (kobject_get_unless_zero(&ub->cdev_dev.kobj)) > >> @@ -509,6 +528,21 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req) > >> } > >> } > >> > >> +static void ubq_complete_io_cmd(struct ublk_io *io, int res) > >> +{ > >> + /* mark this cmd owned by ublksrv */ > >> + io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; > >> + > >> + /* > >> + * clear ACTIVE since we are done with this sqe/cmd slot > >> + * We can only accept io cmd in case of being not active. > >> + */ > >> + io->flags &= ~UBLK_IO_FLAG_ACTIVE; > >> + > >> + /* tell ublksrv one io request is coming */ > >> + io_uring_cmd_done(io->cmd, res, 0); > >> +} > >> + > >> #define UBLK_REQUEUE_DELAY_MS 3 > >> > >> static inline void __ublk_rq_task_work(struct request *req) > >> @@ -531,6 +565,20 @@ static inline void __ublk_rq_task_work(struct request *req) > >> return; > >> } > >> > >> + if (ublk_need_get_data(ubq) && > >> + (req_op(req) == REQ_OP_WRITE || > >> + req_op(req) == REQ_OP_FLUSH) && > >> + !(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) { > >> + > >> + io->flags |= UBLK_IO_FLAG_NEED_GET_DATA; > >> + > >> + pr_devel("%s: ublk_need_get_data. op %d, qid %d tag %d io_flags %x\n", > >> + __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags); > >> + > >> + ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA); > >> + return; > >> + } > >> + > >> mapped_bytes = ublk_map_io(ubq, req, io); > >> > >> /* partially mapped, update io descriptor */ > >> @@ -553,17 +601,13 @@ static inline void __ublk_rq_task_work(struct request *req) > >> mapped_bytes >> 9; > >> } > >> > >> - /* mark this cmd owned by ublksrv */ > >> - io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; > >> - > >> /* > >> - * clear ACTIVE since we are done with this sqe/cmd slot > >> - * We can only accept io cmd in case of being not active. > >> + * Anyway, we have handled UBLK_IO_NEED_GET_DATA for WRITE/FLUSH requests, > >> + * or we did nothing for other types requests. > >> */ > >> - io->flags &= ~UBLK_IO_FLAG_ACTIVE; > >> + io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA; > >> > >> - /* tell ublksrv one io request is coming */ > >> - io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0); > >> + ubq_complete_io_cmd(io, UBLK_IO_RES_OK); > >> } > >> > >> static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) > >> @@ -846,6 +890,16 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) > >> mutex_unlock(&ub->mutex); > >> } > >> > >> +static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, > >> + int tag, struct io_uring_cmd *cmd) > >> +{ > >> + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > >> + struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); > >> + > >> + pdu->req = req; > >> + io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); > >> +} > > > > Here you can choose to use task work or io_uring_cmd_complete_in_task() by > > checking ublk_can_use_task_work(), just like what ublk_queue_rq() does. > > OK, I will add a check on ublk_can_use_task_work(). > > > > >> + > >> static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) > >> { > >> struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd; > >> @@ -884,6 +938,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) > >> goto out; > >> } > >> > >> + /* > >> + * ensure that the user issues UBLK_IO_NEED_GET_DATA > >> + * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA. > >> + */ > >> + if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) > >> + ^ (cmd_op == UBLK_IO_NEED_GET_DATA)) > >> + goto out; > >> + > >> switch (cmd_op) { > >> case UBLK_IO_FETCH_REQ: > >> /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ > >> @@ -917,6 +979,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) > >> io->cmd = cmd; > >> ublk_commit_completion(ub, ub_cmd); > >> break; > >> + case UBLK_IO_NEED_GET_DATA: > >> + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) > >> + goto out; > >> + io->addr = ub_cmd->addr; > >> + io->cmd = cmd; > >> + io->flags |= UBLK_IO_FLAG_ACTIVE; > >> + ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd); > > > > You still need to clear UBLK_IO_FLAG_OWNED_BY_SRV here. > > If ublk_drv gets one UBLK_IO_NEED_GET_DATA command, it should immediately call > io_uring_cmd_complete_in_task() and handling NEED_GET_DATA logic in the task work. > > Since UBLK_IO_FLAG_OWNED_BY_SRV means the "slot" is owned by nbd driver or ublk driver, > I don't think UBLK_IO_FLAG_OWNED_BY_SRV should be cleared. OK, fine. > > BTW, UBLK_IO_FLAG_OWNED_BY_SRV is set in the task work finally. > > > > In future, UBLK_IO_FLAG_OWNED_BY_SRV can be removed actually. > > Agree. UBLK_IO_FLAG_OWNED_BY_SRV should be integrated with UBLK_IO_FLAG_ACTIVE. > > Now I am trying to implement crash recovery mechanism and I concern about memory order IO flags should be only touched in ubq daemon context, so no need to worry about memory order. > while operating these IO flags. IMO, too many IO flags looks dangerous > and I made mistakes on flags while developing UBLK_NEED_GET_DATA. :( There are just 3 commands if GET_DATA is added, and each io command's state is very limited, so there shouldn't be too many io flags. Thanks, Ming