[PATCH V7 8/9] blk-mq: handle requests dispatched from IO scheduler in case of inactive hctx

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



If one hctx becomes inactive when its CPUs are all offline, all in-queue
requests aimed at this hctx have to be re-submitted.

Re-submit requests from both sw queue or scheduler queue when the hctx
is found as inactive.

Cc: John Garry <john.garry@xxxxxxxxxx>
Cc: Bart Van Assche <bvanassche@xxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
 block/blk-mq.c | 100 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 62 insertions(+), 38 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ae1e57c64ca1..54ba8a9c3c93 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2456,6 +2456,52 @@ static void blk_mq_resubmit_io(struct request *rq)
 		blk_mq_resubmit_fs_io(rq);
 }
 
+static void blk_mq_hctx_deactivate(struct blk_mq_hw_ctx *hctx)
+{
+	LIST_HEAD(sched_tmp);
+	LIST_HEAD(re_submit);
+	LIST_HEAD(flush_in);
+	LIST_HEAD(flush_out);
+	struct request *rq, *nxt;
+	struct elevator_queue *e = hctx->queue->elevator;
+
+	if (!e) {
+		blk_mq_flush_busy_ctxs(hctx, &re_submit);
+	} else {
+		while ((rq = e->type->ops.dispatch_request(hctx))) {
+			if (rq->mq_hctx != hctx)
+				list_add(&rq->queuelist, &sched_tmp);
+			else
+				list_add(&rq->queuelist, &re_submit);
+		}
+	}
+	while (!list_empty(&sched_tmp)) {
+		rq = list_entry(sched_tmp.next, struct request,
+				queuelist);
+		list_del_init(&rq->queuelist);
+		blk_mq_sched_insert_request(rq, true, true, true);
+	}
+
+	/* requests in dispatch list have to be re-submitted too */
+	spin_lock(&hctx->lock);
+	list_splice_tail_init(&hctx->dispatch, &re_submit);
+	spin_unlock(&hctx->lock);
+
+	/* blk_end_flush_machinery will cover flush request */
+	list_for_each_entry_safe(rq, nxt, &re_submit, queuelist) {
+		if (rq->rq_flags & RQF_FLUSH_SEQ)
+			list_move(&rq->queuelist, &flush_in);
+	}
+	blk_end_flush_machinery(hctx, &flush_in, &flush_out);
+	list_splice_tail(&flush_out, &re_submit);
+
+	while (!list_empty(&re_submit)) {
+		rq = list_first_entry(&re_submit, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		blk_mq_resubmit_io(rq);
+	}
+}
+
 /*
  * 'cpu' has gone away. If this hctx is inactive, we can't dispatch request
  * to the hctx any more, so steal bios from requests of this hctx, and
@@ -2463,54 +2509,32 @@ static void blk_mq_resubmit_io(struct request *rq)
  */
 static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
 {
-	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
-	LIST_HEAD(tmp);
-	enum hctx_type type;
+	struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
+			struct blk_mq_hw_ctx, cpuhp_dead);
 
 	if (!cpumask_test_cpu(cpu, hctx->cpumask))
 		return 0;
 
-	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
-	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
-	type = hctx->type;
-
-	spin_lock(&ctx->lock);
-	if (!list_empty(&ctx->rq_lists[type])) {
-		list_splice_init(&ctx->rq_lists[type], &tmp);
-		blk_mq_hctx_clear_pending(hctx, ctx);
-	}
-	spin_unlock(&ctx->lock);
+	if (test_bit(BLK_MQ_S_INACTIVE, &hctx->state)) {
+		blk_mq_hctx_deactivate(hctx);
+	} else if (!hctx->queue->elevator) {
+		struct blk_mq_ctx *ctx = __blk_mq_get_ctx(hctx->queue, cpu);
+		enum hctx_type type = hctx->type;
+		LIST_HEAD(tmp);
+
+		spin_lock(&ctx->lock);
+		if (!list_empty(&ctx->rq_lists[type])) {
+			list_splice_init(&ctx->rq_lists[type], &tmp);
+			blk_mq_hctx_clear_pending(hctx, ctx);
+		}
+		spin_unlock(&ctx->lock);
 
-	if (!test_bit(BLK_MQ_S_INACTIVE, &hctx->state)) {
 		if (!list_empty(&tmp)) {
 			spin_lock(&hctx->lock);
 			list_splice_tail_init(&tmp, &hctx->dispatch);
 			spin_unlock(&hctx->lock);
-			blk_mq_run_hw_queue(hctx, true);
-		}
-	} else {
-		LIST_HEAD(flush_in);
-		LIST_HEAD(flush_out);
-		struct request *rq, *nxt;
 
-		/* requests in dispatch list have to be re-submitted too */
-		spin_lock(&hctx->lock);
-		list_splice_tail_init(&hctx->dispatch, &tmp);
-		spin_unlock(&hctx->lock);
-
-		/* blk_end_flush_machinery will cover flush request */
-		list_for_each_entry_safe(rq, nxt, &tmp, queuelist) {
-			if (rq->rq_flags & RQF_FLUSH_SEQ)
-				list_move(&rq->queuelist, &flush_in);
-		}
-		blk_end_flush_machinery(hctx, &flush_in, &flush_out);
-		list_splice_tail(&flush_out, &tmp);
-
-		while (!list_empty(&tmp)) {
-			rq = list_first_entry(&tmp, struct request, queuelist);
-			list_del_init(&rq->queuelist);
-			blk_mq_resubmit_io(rq);
+			blk_mq_run_hw_queue(hctx, true);
 		}
 	}
 
-- 
2.25.2




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux