Re: [PATCH V8 04/11] blk-mq: assign rq->tag in blk_mq_get_driver_tag

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 4/24/20 12:23 PM, Ming Lei wrote:
Especially for none elevator, rq->tag is assigned after the request is
allocated, so there isn't any way to figure out if one request is in
being dispatched. Also the code path wrt. driver tag becomes a bit
difference between none and io scheduler.

When one hctx becomes inactive, we have to prevent any request from
being dispatched to LLD. And get driver tag provides one perfect chance
to do that. Meantime we can drain any such requests by checking if
rq->tag is assigned.


Sorry for being a bit dense, but I'm having a hard time following the description.
Maybe this would be a bit clearer:

When one hctx becomes inactive, we do have to prevent any request from
being dispatched to the LLD. If we intercept them in blk_mq_get_tag() we can also drain all those requests which have no rq->tag assigned.

(With the nice side effect that if above paragraph is correct I've also got it right what the patch is trying to do :-)

So only assign rq->tag until blk_mq_get_driver_tag() is called.

This way also simplifies code of dealing with driver tag a lot.

Cc: Bart Van Assche <bvanassche@xxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: John Garry <john.garry@xxxxxxxxxx>
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
  block/blk-flush.c | 18 ++----------
  block/blk-mq.c    | 75 ++++++++++++++++++++++++-----------------------
  block/blk-mq.h    | 21 +++++++------
  block/blk.h       |  5 ----
  4 files changed, 51 insertions(+), 68 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index c7f396e3d5e2..977edf95d711 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -236,13 +236,8 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
  		error = fq->rq_status;
hctx = flush_rq->mq_hctx;
-	if (!q->elevator) {
-		blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
-		flush_rq->tag = -1;
-	} else {
-		blk_mq_put_driver_tag(flush_rq);
-		flush_rq->internal_tag = -1;
-	}
+	flush_rq->internal_tag = -1;
+	blk_mq_put_driver_tag(flush_rq);
running = &fq->flush_queue[fq->flush_running_idx];
  	BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
@@ -317,14 +312,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
  	flush_rq->mq_ctx = first_rq->mq_ctx;
  	flush_rq->mq_hctx = first_rq->mq_hctx;
- if (!q->elevator) {
-		fq->orig_rq = first_rq;
-		flush_rq->tag = first_rq->tag;
-		blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq);
-	} else {
-		flush_rq->internal_tag = first_rq->internal_tag;
-	}
-
+	flush_rq->internal_tag = first_rq->internal_tag;
  	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
  	flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
  	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 79267f2e8960..65f0aaed55ff 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -276,18 +276,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
  	struct request *rq = tags->static_rqs[tag];
  	req_flags_t rq_flags = 0;
- if (data->flags & BLK_MQ_REQ_INTERNAL) {
-		rq->tag = -1;
-		rq->internal_tag = tag;
-	} else {
-		if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
-			rq_flags = RQF_MQ_INFLIGHT;
-			atomic_inc(&data->hctx->nr_active);
-		}
-		rq->tag = tag;
-		rq->internal_tag = -1;
-		data->hctx->tags->rqs[rq->tag] = rq;
-	}
+	rq->internal_tag = tag;
+	rq->tag = -1;
/* csd/requeue_work/fifo_time is initialized before use */
  	rq->q = data->q;
@@ -472,14 +462,18 @@ static void __blk_mq_free_request(struct request *rq)
  	struct request_queue *q = rq->q;
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
-	const int sched_tag = rq->internal_tag;
blk_pm_mark_last_busy(rq);
  	rq->mq_hctx = NULL;
-	if (rq->tag != -1)
-		blk_mq_put_tag(hctx->tags, ctx, rq->tag);
-	if (sched_tag != -1)
-		blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
+
+	if (hctx->sched_tags) {
+		if (rq->tag >= 0)
+			blk_mq_put_tag(hctx->tags, ctx, rq->tag);
+		blk_mq_put_tag(hctx->sched_tags, ctx, rq->internal_tag);
+	} else {
+		blk_mq_put_tag(hctx->tags, ctx, rq->internal_tag);
+        }
+
  	blk_mq_sched_restart(hctx);
  	blk_queue_exit(q);
  }
@@ -527,7 +521,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
  		blk_stat_add(rq, now);
  	}
- if (rq->internal_tag != -1)
+	if (rq->q->elevator && rq->internal_tag != -1)
  		blk_mq_sched_completed_request(rq, now);
blk_account_io_done(rq, now);

One really does wonder: under which circumstances can 'internal_tag' be -1 now ? The hunk above seems to imply that 'internal_tag' is now always be set; and this is also the impression I got from reading this patch.
Care to elaborate?

@@ -1027,33 +1021,40 @@ static inline unsigned int queued_to_index(unsigned int queued)
  	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
  }
-static bool blk_mq_get_driver_tag(struct request *rq)
+static bool __blk_mq_get_driver_tag(struct request *rq)
  {
  	struct blk_mq_alloc_data data = {
-		.q = rq->q,
-		.hctx = rq->mq_hctx,
-		.flags = BLK_MQ_REQ_NOWAIT,
-		.cmd_flags = rq->cmd_flags,
+		.q		= rq->q,
+		.hctx		= rq->mq_hctx,
+		.flags		= BLK_MQ_REQ_NOWAIT,
+		.cmd_flags	= rq->cmd_flags,
  	};
-	bool shared;
- if (rq->tag != -1)
-		return true;
+	if (data.hctx->sched_tags) {
+		if (blk_mq_tag_is_reserved(data.hctx->sched_tags,
+				rq->internal_tag))
+			data.flags |= BLK_MQ_REQ_RESERVED;
+		rq->tag = blk_mq_get_tag(&data);
+	} else {
+		rq->tag = rq->internal_tag;
+	}
- if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
-		data.flags |= BLK_MQ_REQ_RESERVED;
+	if (rq->tag == -1)
+		return false;
- shared = blk_mq_tag_busy(data.hctx);
-	rq->tag = blk_mq_get_tag(&data);
-	if (rq->tag >= 0) {
-		if (shared) {
-			rq->rq_flags |= RQF_MQ_INFLIGHT;
-			atomic_inc(&data.hctx->nr_active);
-		}
-		data.hctx->tags->rqs[rq->tag] = rq;
+	if (blk_mq_tag_busy(data.hctx)) {
+		rq->rq_flags |= RQF_MQ_INFLIGHT;
+		atomic_inc(&data.hctx->nr_active);
  	}
+	data.hctx->tags->rqs[rq->tag] = rq;
+	return true;
+}
- return rq->tag != -1;
+static bool blk_mq_get_driver_tag(struct request *rq)
+{
+	if (rq->tag != -1)
+		return true;
+	return __blk_mq_get_driver_tag(rq);
  }
static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e7d1da4b1f73..d0c72d7d07c8 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -196,26 +196,25 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
  	return true;
  }
-static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
-					   struct request *rq)
+static inline void blk_mq_put_driver_tag(struct request *rq)
  {
-	blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
+	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+	int tag = rq->tag;
+
+	if (tag < 0)
+		return;
+
  	rq->tag = -1;
   > +	if (hctx->sched_tags)
+		blk_mq_put_tag(hctx->tags, rq->mq_ctx, tag);
+
I wonder if you need the local variable 'tag' here; might it not be better to set 'rq->tag' to '-1' after the call to put_tag?

But then I'm not sure, either, so it might just be cosmetic ..

  	if (rq->rq_flags & RQF_MQ_INFLIGHT) {
  		rq->rq_flags &= ~RQF_MQ_INFLIGHT;
  		atomic_dec(&hctx->nr_active);
  	}
  }
-static inline void blk_mq_put_driver_tag(struct request *rq)
-{
-	if (rq->tag == -1 || rq->internal_tag == -1)
-		return;
-
-	__blk_mq_put_driver_tag(rq->mq_hctx, rq);
-}
-
  static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
  {
  	int cpu;
diff --git a/block/blk.h b/block/blk.h
index bbbced0b3c8c..446b2893b478 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -26,11 +26,6 @@ struct blk_flush_queue {
  	struct list_head	flush_data_in_flight;
  	struct request		*flush_rq;
- /*
-	 * flush_rq shares tag with this rq, both can't be active
-	 * at the same time
-	 */
-	struct request		*orig_rq;
  	struct lock_class_key	key;
  	spinlock_t		mq_flush_lock;
  };

Cheers,

Hannes
--
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare@xxxxxxx                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux