Supposing the following scenario with a virtio_blk driver. CPU0 CPU1 blk_mq_try_issue_directly() __blk_mq_issue_directly() q->mq_ops->queue_rq() virtio_queue_rq() blk_mq_stop_hw_queue() virtblk_done() blk_mq_request_bypass_insert() blk_mq_start_stopped_hw_queues() /* Add IO request to dispatch list */ 1) store blk_mq_start_stopped_hw_queue() clear_bit(BLK_MQ_S_STOPPED) 3) store blk_mq_run_hw_queue() blk_mq_run_hw_queue() if (!blk_mq_hctx_has_pending()) if (!blk_mq_hctx_has_pending()) 4) load return return blk_mq_sched_dispatch_requests() blk_mq_sched_dispatch_requests() if (blk_mq_hctx_stopped()) 2) load if (blk_mq_hctx_stopped()) return return __blk_mq_sched_dispatch_requests() __blk_mq_sched_dispatch_requests() The full memory barrier should be inserted between 1) and 2), as well as between 3) and 4) to make sure that either CPU0 sees BLK_MQ_S_STOPPED is cleared or CPU1 sees dispatch list or setting of bitmap of software queue. Otherwise, either CPU will not re-run the hardware queue causing starvation. Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> --- block/blk-mq.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index b2d0f22de0c7f..6f18993b8f454 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2075,6 +2075,13 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, * in blk_mq_sched_restart(). Avoid restart code path to * miss the new added requests to hctx->dispatch, meantime * SCHED_RESTART is observed here. + * + * This barrier is also used to order adding of dispatch list + * above and the test of BLK_MQ_S_STOPPED in the following + * routine (in blk_mq_delay_run_hw_queue()). Pairs with the + * barrier in blk_mq_start_stopped_hw_queue(). So dispatch code + * could either see BLK_MQ_S_STOPPED is cleared or dispatch list + * to avoid missing dispatching requests. */ smp_mb(); @@ -2237,6 +2244,17 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) if (!need_run) return; + /* + * This barrier is used to order adding of dispatch list or setting + * of bitmap of any software queue outside of this function and the + * test of BLK_MQ_S_STOPPED in the following routine. Pairs with the + * barrier in blk_mq_start_stopped_hw_queue(). So dispatch code could + * either see BLK_MQ_S_STOPPED is cleared or dispatch list or setting + * of bitmap of any software queue to avoid missing dispatching + * requests. + */ + smp_mb(); + if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { blk_mq_delay_run_hw_queue(hctx, 0); return; @@ -2392,6 +2410,13 @@ void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) return; clear_bit(BLK_MQ_S_STOPPED, &hctx->state); + /* + * Pairs with the smp_mb() in blk_mq_run_hw_queue() or + * blk_mq_dispatch_rq_list() to order the clearing of + * BLK_MQ_S_STOPPED and the test of dispatch list or + * bitmap of any software queue. + */ + smp_mb__after_atomic(); blk_mq_run_hw_queue(hctx, async); } EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue); -- 2.20.1