Re: [PATCH V6 6/6] blk-mq: support batching dispatch in case of io scheduler

Ming Lei <ming.lei@xxxxxxxxxx> · Mon, 29 Jun 2020 18:32:39 +0800

On Mon, Jun 29, 2020 at 10:04:52AM +0100, Christoph Hellwig wrote:
> > +/*
> > + * We know bfq and deadline apply single scheduler queue instead of multi
> > + * queue. However, the two are often used on single queue devices, also
> > + * the current @hctx should affect the real device status most of times
> > + * because of locality principle.
> > + *
> > + * So use current hctx->dispatch_busy directly for figuring out batching
> > + * dispatch count.
> > + */
> 
> I don't really understand this comment.  Also I think the code might
> be cleaner if this function is inlined as an if/else in the only
> caller.
> 
> > +static inline bool blk_mq_do_dispatch_rq_lists(struct blk_mq_hw_ctx *hctx,
> > +		struct list_head *lists, bool multi_hctxs, unsigned count)
> > +{
> > +	bool ret;
> > +
> > +	if (!count)
> > +		return false;
> > +
> > +	if (likely(!multi_hctxs))
> > +		return blk_mq_dispatch_rq_list(hctx, lists, count);
> 
> Keeping these checks in the callers would keep things a little cleaner,
> especially as the multi hctx case only really needs the lists argument.
> 
> > +		LIST_HEAD(list);
> > +		struct request *new, *rq = list_first_entry(lists,
> > +				struct request, queuelist);
> > +		unsigned cnt = 0;
> > +
> > +		list_for_each_entry(new, lists, queuelist) {
> > +			if (new->mq_hctx != rq->mq_hctx)
> > +				break;
> > +			cnt++;
> > +		}
> > +
> > +		if (new->mq_hctx == rq->mq_hctx)
> > +			list_splice_tail_init(lists, &list);
> > +		else
> > +			list_cut_before(&list, lists, &new->queuelist);
> > +
> > +		ret = blk_mq_dispatch_rq_list(rq->mq_hctx, &list, cnt);
> > +	}
> 
> I think this has two issues:  for one ret should be ORed as any dispatch
> or error should leaave ret set.  Also we need to splice the dispatch

OK.

> list back onto the main list here, otherwise we can lose those requests.

The dispatch list always becomes empty after blk_mq_dispatch_rq_list()
returns, so no need to splice it back.

> 
> FYI, while reviewing this I ended up editing things into a shape I
> could better understand.  Let me know what you think of this version?
> 
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 4c72073830f3cb..466dce99699ae4 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -7,6 +7,7 @@
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/blk-mq.h>
> +#include <linux/list_sort.h>
>  
>  #include <trace/events/block.h>
>  
> @@ -80,6 +81,38 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
>  	blk_mq_run_hw_queue(hctx, true);
>  }
>  
> +static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
> +{
> +	struct request *rqa = container_of(a, struct request, queuelist);
> +	struct request *rqb = container_of(b, struct request, queuelist);
> +
> +	return rqa->mq_hctx > rqb->mq_hctx;
> +}
> +
> +static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
> +{
> +	struct blk_mq_hw_ctx *hctx =
> +		list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
> +	struct request *rq;
> +	LIST_HEAD(hctx_list);
> +	unsigned int count = 0;
> +	bool ret;
> +
> +	list_for_each_entry(rq, rq_list, queuelist) {
> +		if (rq->mq_hctx != hctx) {
> +			list_cut_before(&hctx_list, rq_list, &rq->queuelist);
> +			goto dispatch;
> +		}
> +		count++;
> +	}
> +	list_splice_tail_init(rq_list, &hctx_list);
> +
> +dispatch:
> +	ret = blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
> +	list_splice(&hctx_list, rq_list);

The above line isn't needed.

> +	return ret;
> +}
> +
>  #define BLK_MQ_BUDGET_DELAY	3		/* ms units */
>  
>  /*
> @@ -90,20 +123,29 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
>   * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
>   * be run again.  This is necessary to avoid starving flushes.
>   */
> -static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
> +static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)

The return type can be changed to 'bool'.

>  {
>  	struct request_queue *q = hctx->queue;
>  	struct elevator_queue *e = q->elevator;
> +	bool multi_hctxs = false, run_queue = false;
> +	bool dispatched = false, busy = false;
> +	unsigned int max_dispatch;
>  	LIST_HEAD(rq_list);
> -	int ret = 0;
> -	struct request *rq;
> +	int count = 0;
> +
> +	if (hctx->dispatch_busy)
> +		max_dispatch = 1;
> +	else
> +		max_dispatch = hctx->queue->nr_requests;
>  
>  	do {
> +		struct request *rq;
> +
>  		if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
>  			break;
>  
>  		if (!list_empty_careful(&hctx->dispatch)) {
> -			ret = -EAGAIN;
> +			busy = true;
>  			break;
>  		}
>  
> @@ -120,7 +162,7 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
>  			 * no guarantee anyone will kick the queue.  Kick it
>  			 * ourselves.
>  			 */
> -			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
> +			run_queue = true;
>  			break;
>  		}
>  
> @@ -130,7 +172,43 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
>  		 * in blk_mq_dispatch_rq_list().
>  		 */
>  		list_add(&rq->queuelist, &rq_list);

The above should change to list_add_tail(&rq->queuelist, &rq_list).

Thanks, 
Ming