From: Jens Axboe <axboe@xxxxxxxxx> commit 79f720a751cad613620d0237e3b44f89f4a69181 upstream Currently we are inconsistent in when we decide to run the queue. Using blk_mq_run_hw_queues() we check if the hctx has pending IO before running it, but we don't do that from the individual queue run function, blk_mq_run_hw_queue(). This results in a lot of extra and pointless queue runs, potentially, on flush requests and (much worse) on tag starvation situations. This is observable just looking at top output, with lots of kworkers active. For the !async runs, it just adds to the CPU overhead of blk-mq. Move the has-pending check into the run function instead of having callers do it. Reviewed-by: Christoph Hellwig <hch@xxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> Signed-off-by: Jack Wang <jinpu.wang@xxxxxxxxxxxxxxxx> --- block/blk-mq-sched.c | 7 +------ block/blk-mq.c | 18 +++++++++++------- block/blk-mq.h | 2 -- include/linux/blk-mq.h | 2 +- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 6f4bdb8209f7..c117bd8fd1f6 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -81,12 +81,7 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) } else clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) { - blk_mq_run_hw_queue(hctx, true); - return true; - } - - return false; + return blk_mq_run_hw_queue(hctx, true); } /* diff --git a/block/blk-mq.c b/block/blk-mq.c index 7b45f20ae7c1..b3f15d9c7c5a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -60,10 +60,10 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) /* * Check if any of the ctx's have pending work in this hardware queue */ -bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) +static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { - return sbitmap_any_bit_set(&hctx->ctx_map) || - !list_empty_careful(&hctx->dispatch) || + return !list_empty_careful(&hctx->dispatch) || + sbitmap_any_bit_set(&hctx->ctx_map) || blk_mq_sched_has_work(hctx); } @@ -1261,9 +1261,14 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) } EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { - __blk_mq_delay_run_hw_queue(hctx, async, 0); + if (blk_mq_hctx_has_pending(hctx)) { + __blk_mq_delay_run_hw_queue(hctx, async, 0); + return true; + } + + return false; } EXPORT_SYMBOL(blk_mq_run_hw_queue); @@ -1273,8 +1278,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async) int i; queue_for_each_hw_ctx(q, hctx, i) { - if (!blk_mq_hctx_has_pending(hctx) || - blk_mq_hctx_stopped(hctx)) + if (blk_mq_hctx_stopped(hctx)) continue; blk_mq_run_hw_queue(hctx, async); diff --git a/block/blk-mq.h b/block/blk-mq.h index a5c2b46951e4..1b849a163e9d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -27,14 +27,12 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); -bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, bool wait); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index cc51f940aec4..d0162ea97078 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -261,7 +261,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); +bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, -- 2.7.4