On 11/22/22 00:56, Ming Lei wrote: > Either ublk_can_use_task_work() is true or not, io commands are > forwarded to ublk server in reverse order, since llist_add() is > always to add one element to the head of the list. > > Even though block layer doesn't guarantee request dispatch order, > requests should be sent to hardware in the sequence order generated > from io scheduler, which usually considers the request's LBA, and > order is often important for HDD. > > So forward io commands in the sequence made from io scheduler by > aligning task work with current io_uring command's batch handling, > and it has been observed that both can get similar performance data > if IORING_SETUP_COOP_TASKRUN is set from ublk server. > > Reported-by: Andreas Hindborg <andreas.hindborg@xxxxxxx> > Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> Looks ok to me. Reviewed-by: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 82 +++++++++++++++++++--------------------- > 1 file changed, 38 insertions(+), 44 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index f96cb01e9604..e9de9d846b73 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -57,10 +57,8 @@ > #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) > > struct ublk_rq_data { > - union { > - struct callback_head work; > - struct llist_node node; > - }; > + struct llist_node node; > + struct callback_head work; > }; > > struct ublk_uring_cmd_pdu { > @@ -766,15 +764,31 @@ static inline void __ublk_rq_task_work(struct request *req) > ubq_complete_io_cmd(io, UBLK_IO_RES_OK); > } > > +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) > +{ > + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); > + struct ublk_rq_data *data, *tmp; > + > + io_cmds = llist_reverse_order(io_cmds); > + llist_for_each_entry_safe(data, tmp, io_cmds, node) > + __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); > +} > + > +static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) > +{ > + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); > + struct ublk_rq_data *data, *tmp; > + > + llist_for_each_entry_safe(data, tmp, io_cmds, node) > + __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); > +} > + > static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) > { > struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > struct ublk_queue *ubq = pdu->ubq; > - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); > - struct ublk_rq_data *data; > > - llist_for_each_entry(data, io_cmds, node) > - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); > + ublk_forward_io_cmds(ubq); > } > > static void ublk_rq_task_work_fn(struct callback_head *work) > @@ -782,14 +796,20 @@ static void ublk_rq_task_work_fn(struct callback_head *work) > struct ublk_rq_data *data = container_of(work, > struct ublk_rq_data, work); > struct request *req = blk_mq_rq_from_pdu(data); > + struct ublk_queue *ubq = req->mq_hctx->driver_data; > > - __ublk_rq_task_work(req); > + ublk_forward_io_cmds(ubq); > } > > -static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) > +static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) > { > - struct ublk_io *io = &ubq->ios[rq->tag]; > + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); > + struct ublk_io *io; > > + if (!llist_add(&data->node, &ubq->io_cmds)) > + return; > + > + io = &ubq->ios[rq->tag]; > /* > * If the check pass, we know that this is a re-issued request aborted > * previously in monitor_work because the ubq_daemon(cmd's task) is > @@ -803,11 +823,11 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) > * guarantees that here is a re-issued request aborted previously. > */ > if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { > - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); > - struct ublk_rq_data *data; > - > - llist_for_each_entry(data, io_cmds, node) > - __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); > + ublk_abort_io_cmds(ubq); > + } else if (ublk_can_use_task_work(ubq)) { > + if (task_work_add(ubq->ubq_daemon, &data->work, > + TWA_SIGNAL_NO_IPI)) > + ublk_abort_io_cmds(ubq); > } else { > struct io_uring_cmd *cmd = io->cmd; > struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > @@ -817,23 +837,6 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) > } > } > > -static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq, > - bool last) > -{ > - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); > - > - if (ublk_can_use_task_work(ubq)) { > - enum task_work_notify_mode notify_mode = last ? > - TWA_SIGNAL_NO_IPI : TWA_NONE; > - > - if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) > - __ublk_abort_rq(ubq, rq); > - } else { > - if (llist_add(&data->node, &ubq->io_cmds)) > - ublk_submit_cmd(ubq, rq); > - } > -} > - > static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > const struct blk_mq_queue_data *bd) > { > @@ -865,19 +868,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > return BLK_STS_OK; > } > > - ublk_queue_cmd(ubq, rq, bd->last); > + ublk_queue_cmd(ubq, rq); > > return BLK_STS_OK; > } > > -static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx) > -{ > - struct ublk_queue *ubq = hctx->driver_data; > - > - if (ublk_can_use_task_work(ubq)) > - __set_notify_signal(ubq->ubq_daemon); > -} > - > static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, > unsigned int hctx_idx) > { > @@ -899,7 +894,6 @@ static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req, > > static const struct blk_mq_ops ublk_mq_ops = { > .queue_rq = ublk_queue_rq, > - .commit_rqs = ublk_commit_rqs, > .init_hctx = ublk_init_hctx, > .init_request = ublk_init_rq, > }; > @@ -1197,7 +1191,7 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, > struct ublk_queue *ubq = ublk_get_queue(ub, q_id); > struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); > > - ublk_queue_cmd(ubq, req, true); > + ublk_queue_cmd(ubq, req); > } > > static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) -- Damien Le Moal Western Digital Research