On Mon, Jun 29, 2020 at 10:04:52AM +0100, Christoph Hellwig wrote: > > +/* > > + * We know bfq and deadline apply single scheduler queue instead of multi > > + * queue. However, the two are often used on single queue devices, also > > + * the current @hctx should affect the real device status most of times > > + * because of locality principle. > > + * > > + * So use current hctx->dispatch_busy directly for figuring out batching > > + * dispatch count. > > + */ > > I don't really understand this comment. Also I think the code might > be cleaner if this function is inlined as an if/else in the only > caller. > > > +static inline bool blk_mq_do_dispatch_rq_lists(struct blk_mq_hw_ctx *hctx, > > + struct list_head *lists, bool multi_hctxs, unsigned count) > > +{ > > + bool ret; > > + > > + if (!count) > > + return false; > > + > > + if (likely(!multi_hctxs)) > > + return blk_mq_dispatch_rq_list(hctx, lists, count); > > Keeping these checks in the callers would keep things a little cleaner, > especially as the multi hctx case only really needs the lists argument. > > > + LIST_HEAD(list); > > + struct request *new, *rq = list_first_entry(lists, > > + struct request, queuelist); > > + unsigned cnt = 0; > > + > > + list_for_each_entry(new, lists, queuelist) { > > + if (new->mq_hctx != rq->mq_hctx) > > + break; > > + cnt++; > > + } > > + > > + if (new->mq_hctx == rq->mq_hctx) > > + list_splice_tail_init(lists, &list); > > + else > > + list_cut_before(&list, lists, &new->queuelist); > > + > > + ret = blk_mq_dispatch_rq_list(rq->mq_hctx, &list, cnt); > > + } > > I think this has two issues: for one ret should be ORed as any dispatch > or error should leaave ret set. Also we need to splice the dispatch OK. > list back onto the main list here, otherwise we can lose those requests. The dispatch list always becomes empty after blk_mq_dispatch_rq_list() returns, so no need to splice it back. > > FYI, while reviewing this I ended up editing things into a shape I > could better understand. Let me know what you think of this version? > > > diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c > index 4c72073830f3cb..466dce99699ae4 100644 > --- a/block/blk-mq-sched.c > +++ b/block/blk-mq-sched.c > @@ -7,6 +7,7 @@ > #include <linux/kernel.h> > #include <linux/module.h> > #include <linux/blk-mq.h> > +#include <linux/list_sort.h> > > #include <trace/events/block.h> > > @@ -80,6 +81,38 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) > blk_mq_run_hw_queue(hctx, true); > } > > +static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b) > +{ > + struct request *rqa = container_of(a, struct request, queuelist); > + struct request *rqb = container_of(b, struct request, queuelist); > + > + return rqa->mq_hctx > rqb->mq_hctx; > +} > + > +static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list) > +{ > + struct blk_mq_hw_ctx *hctx = > + list_first_entry(rq_list, struct request, queuelist)->mq_hctx; > + struct request *rq; > + LIST_HEAD(hctx_list); > + unsigned int count = 0; > + bool ret; > + > + list_for_each_entry(rq, rq_list, queuelist) { > + if (rq->mq_hctx != hctx) { > + list_cut_before(&hctx_list, rq_list, &rq->queuelist); > + goto dispatch; > + } > + count++; > + } > + list_splice_tail_init(rq_list, &hctx_list); > + > +dispatch: > + ret = blk_mq_dispatch_rq_list(hctx, &hctx_list, count); > + list_splice(&hctx_list, rq_list); The above line isn't needed. > + return ret; > +} > + > #define BLK_MQ_BUDGET_DELAY 3 /* ms units */ > > /* > @@ -90,20 +123,29 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) > * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to > * be run again. This is necessary to avoid starving flushes. > */ > -static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) > +static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) The return type can be changed to 'bool'. > { > struct request_queue *q = hctx->queue; > struct elevator_queue *e = q->elevator; > + bool multi_hctxs = false, run_queue = false; > + bool dispatched = false, busy = false; > + unsigned int max_dispatch; > LIST_HEAD(rq_list); > - int ret = 0; > - struct request *rq; > + int count = 0; > + > + if (hctx->dispatch_busy) > + max_dispatch = 1; > + else > + max_dispatch = hctx->queue->nr_requests; > > do { > + struct request *rq; > + > if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) > break; > > if (!list_empty_careful(&hctx->dispatch)) { > - ret = -EAGAIN; > + busy = true; > break; > } > > @@ -120,7 +162,7 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) > * no guarantee anyone will kick the queue. Kick it > * ourselves. > */ > - blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY); > + run_queue = true; > break; > } > > @@ -130,7 +172,43 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) > * in blk_mq_dispatch_rq_list(). > */ > list_add(&rq->queuelist, &rq_list); The above should change to list_add_tail(&rq->queuelist, &rq_list). Thanks, Ming