When all CPUs in one hctx are offline and this hctx becomes inactive, we shouldn't run this hw queue for completing request any more. So steal bios from the request, and resubmit them, and finally free the request in blk_mq_hctx_notify_dead(). Cc: John Garry <john.garry@xxxxxxxxxx> Cc: Bart Van Assche <bvanassche@xxxxxxx> Cc: Hannes Reinecke <hare@xxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Keith Busch <keith.busch@xxxxxxxxx> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq.c | 58 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6f9d2f5e0b53..3e52ba74661e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2287,10 +2287,34 @@ static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node) return 0; } +static void blk_mq_resubmit_io(struct request *rq) +{ + struct bio_list list; + struct bio *bio; + + bio_list_init(&list); + blk_steal_bios(&list, rq); + + /* + * Free the old empty request before submitting bio for avoiding + * potential deadlock + */ + blk_mq_cleanup_rq(rq); + blk_mq_end_request(rq, 0); + + while (true) { + bio = bio_list_pop(&list); + if (!bio) + break; + + generic_make_request(bio); + } +} + /* - * 'cpu' is going away. splice any existing rq_list entries from this - * software queue to the hw queue dispatch list, and ensure that it - * gets run. + * 'cpu' has gone away. If this hctx is inactive, we can't dispatch request + * to the hctx any more, so steal bios from requests of this hctx, and + * re-submit them to the request queue, and free these requests finally. */ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) { @@ -2310,16 +2334,28 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) } spin_unlock(&ctx->lock); - clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); - - if (list_empty(&tmp)) - return 0; + if (!test_bit(BLK_MQ_S_INACTIVE, &hctx->state)) { + if (!list_empty(&tmp)) { + spin_lock(&hctx->lock); + list_splice_tail_init(&tmp, &hctx->dispatch); + spin_unlock(&hctx->lock); + blk_mq_run_hw_queue(hctx, true); + } + } else { + /* requests in dispatch list has to be re-submitted too */ + spin_lock(&hctx->lock); + list_splice_tail_init(&hctx->dispatch, &tmp); + spin_unlock(&hctx->lock); - spin_lock(&hctx->lock); - list_splice_tail_init(&tmp, &hctx->dispatch); - spin_unlock(&hctx->lock); + while (!list_empty(&tmp)) { + struct request *rq = list_entry(tmp.next, + struct request, queuelist); + list_del_init(&rq->queuelist); + blk_mq_resubmit_io(rq); + } + clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); + } - blk_mq_run_hw_queue(hctx, true); return 0; } -- 2.20.1