If hardware queues are stopped for some event, like the device has been suspended by power management, requests allocated on that hardware queue are indefinitely stuck causing a queue freeze to wait forever. This patch abandons requests on stopped queues after syncing with the all queue_rq events when we need to rebalance the queues. While we would prefer not to end the requests error if it's possible to submit them on a different context, there's no good way to unwind a request to submit on a valid context once it enters a stopped context for removal. Ending IO with EAGAIN is a better alternative than deadlocking. Reported-by: Marc Merlin <marc@xxxxxxxxxxx> Signed-off-by: Keith Busch <keith.busch@xxxxxxxxx> --- block/blk-mq.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 9b7ed03..0c9a2a3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -117,22 +117,12 @@ void blk_mq_unfreeze_queue(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); -/** - * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished - * @q: request queue. - * - * Note: this function does not prevent that the struct request end_io() - * callback function is invoked. Additionally, it is not prevented that - * new queue_rq() calls occur unless the queue has been stopped first. - */ -void blk_mq_quiesce_queue(struct request_queue *q) +static void blk_mq_sync_queue(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned int i; bool rcu = false; - blk_mq_stop_hw_queues(q); - queue_for_each_hw_ctx(q, hctx, i) { if (hctx->flags & BLK_MQ_F_BLOCKING) synchronize_srcu(&hctx->queue_rq_srcu); @@ -142,6 +132,20 @@ void blk_mq_quiesce_queue(struct request_queue *q) if (rcu) synchronize_rcu(); } + +/** + * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished + * @q: request queue. + * + * Note: this function does not prevent that the struct request end_io() + * callback function is invoked. Additionally, it is not prevented that + * new queue_rq() calls occur unless the queue has been stopped first. + */ +void blk_mq_quiesce_queue(struct request_queue *q) +{ + blk_mq_stop_hw_queues(q); + blk_mq_sync_queue(q); +} EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); void blk_mq_wake_waiters(struct request_queue *q) @@ -2228,6 +2232,51 @@ static void blk_mq_queue_reinit(struct request_queue *q, blk_mq_sysfs_register(q); } +static void blk_mq_abandon_stopped_requests(struct request_queue *q) +{ + int i; + struct request *rq, *next; + struct blk_mq_hw_ctx *hctx; + LIST_HEAD(rq_list); + + blk_mq_sync_queue(q); + + spin_lock(&q->requeue_lock); + list_for_each_entry_safe(rq, next, &q->requeue_list, queuelist) { + struct blk_mq_ctx *ctx; + + ctx = rq->mq_ctx; + hctx = blk_mq_map_queue(q, ctx->cpu); + if (blk_mq_hctx_stopped(hctx)) { + list_del_init(&rq->queuelist); + + spin_lock(&hctx->lock); + list_add_tail(&rq->queuelist, &rq_list); + spin_unlock(&hctx->lock); + } + } + + queue_for_each_hw_ctx(q, hctx, i) { + if (!blk_mq_hctx_stopped(hctx)) + continue; + + flush_busy_ctxs(hctx, &rq_list); + + spin_lock(&hctx->lock); + if (!list_empty(&hctx->dispatch)) + list_splice_init(&hctx->dispatch, &rq_list); + spin_unlock(&hctx->lock); + } + spin_unlock(&q->requeue_lock); + + while (!list_empty(&rq_list)) { + rq = list_first_entry(&rq_list, struct request, queuelist); + list_del_init(&rq->queuelist); + rq->errors = -EAGAIN; + blk_mq_end_request(rq, rq->errors); + } +} + /* * New online cpumask which is going to be set in this hotplug event. * Declare this cpumasks as global as cpu-hotplug operation is invoked @@ -2250,6 +2299,8 @@ static void blk_mq_queue_reinit_work(void) list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_freeze_queue_start(q); list_for_each_entry(q, &all_q_list, all_q_node) + blk_mq_abandon_stopped_requests(q); + list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_freeze_queue_wait(q); list_for_each_entry(q, &all_q_list, all_q_node) @@ -2477,7 +2528,11 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) return; list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue(q); + blk_mq_freeze_queue_start(q); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_abandon_stopped_requests(q); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_freeze_queue_wait(q); set->nr_hw_queues = nr_hw_queues; if (set->ops->map_queues) -- 2.5.5 -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html