On 4/24/20 12:23 PM, Ming Lei wrote:
Now request is thought as in-flight only when its state is updated as
MQ_RQ_IN_FLIGHT, which is done by dirver via blk_mq_start_request().
driver
Actually from blk-mq's view, one rq can be thought as in-flight
after its tag is >= 0.
Well, and that we should clarify to avoid any misunderstanding.
To my understanding, 'in-flight' are request which are submitted to
the LLD. IE we'll have a lifetime rule like
internal_tag >= tag > in-flight
If the existence of a 'tag' would be equivalent to 'in-flight' we could
do away with all the convoluted code managing the MQ_RQ_IN_FLIGHT state,
wouldn't we?
Passing one rq filter callback so that we can iterating requests very
flexiable.
flexible
Meantime blk_mq_all_tag_busy_iter is defined as public, which will be
called from blk-mq internally.
Maybe:
Implement blk_mq_all_tag_busy_iter() which accepts a 'busy_fn' argument
to filter over which commands to iterate, and make the existing
blk_mq_tag_busy_iter() a wrapper for the new function.
Cc: John Garry <john.garry@xxxxxxxxxx>
Cc: Bart Van Assche <bvanassche@xxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Reviewed-by: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
block/blk-mq-tag.c | 39 +++++++++++++++++++++++++++------------
block/blk-mq-tag.h | 4 ++++
2 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 586c9d6e904a..2e43b827c96d 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -255,6 +255,7 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
struct bt_tags_iter_data {
struct blk_mq_tags *tags;
busy_tag_iter_fn *fn;
+ busy_rq_iter_fn *busy_rq_fn;
void *data;
bool reserved;
};
@@ -274,7 +275,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* test and set the bit before assining ->rqs[].
*/
rq = tags->rqs[bitnr];
- if (rq && blk_mq_request_started(rq))
+ if (rq && iter_data->busy_rq_fn(rq, iter_data->data, reserved))
return iter_data->fn(rq, iter_data->data, reserved);
return true;
@@ -294,11 +295,13 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* bitmap_tags member of struct blk_mq_tags.
*/
static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
- busy_tag_iter_fn *fn, void *data, bool reserved)
+ busy_tag_iter_fn *fn, busy_rq_iter_fn *busy_rq_fn,
+ void *data, bool reserved)
{
struct bt_tags_iter_data iter_data = {
.tags = tags,
.fn = fn,
+ .busy_rq_fn = busy_rq_fn,
.data = data,
.reserved = reserved,
};
@@ -310,19 +313,30 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
/**
* blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map
* @tags: Tag map to iterate over.
- * @fn: Pointer to the function that will be called for each started
- * request. @fn will be called as follows: @fn(rq, @priv,
- * reserved) where rq is a pointer to a request. 'reserved'
- * indicates whether or not @rq is a reserved request. Return
- * true to continue iterating tags, false to stop.
+ * @fn: Pointer to the function that will be called for each request
+ * when .busy_rq_fn(rq) returns true. @fn will be called as
+ * follows: @fn(rq, @priv, reserved) where rq is a pointer to a
+ * request. 'reserved' indicates whether or not @rq is a reserved
+ * request. Return true to continue iterating tags, false to stop.
+ * @busy_rq_fn: Pointer to the function that will be called for each request,
+ * @busy_rq_fn's type is same with @fn. Only when @busy_rq_fn(rq,
+ * @priv, reserved) returns true, @fn will be called on this rq.
* @priv: Will be passed as second argument to @fn.
*/
-static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
- busy_tag_iter_fn *fn, void *priv)
+void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
+ busy_tag_iter_fn *fn, busy_rq_iter_fn *busy_rq_fn,
+ void *priv)
{
if (tags->nr_reserved_tags)
- bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
- bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
+ bt_tags_for_each(tags, &tags->breserved_tags, fn, busy_rq_fn,
+ priv, true);
+ bt_tags_for_each(tags, &tags->bitmap_tags, fn, busy_rq_fn, priv, false);
+}
+
+static bool blk_mq_default_busy_rq(struct request *rq, void *data,
+ bool reserved)
+{
+ return blk_mq_request_started(rq);
}
/**
@@ -342,7 +356,8 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
for (i = 0; i < tagset->nr_hw_queues; i++) {
if (tagset->tags && tagset->tags[i])
- blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
+ blk_mq_all_tag_busy_iter(tagset->tags[i], fn,
+ blk_mq_default_busy_rq, priv);
}
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 2b8321efb682..fdf095d513e5 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -21,6 +21,7 @@ struct blk_mq_tags {
struct list_head page_list;
};
+typedef bool (busy_rq_iter_fn)(struct request *, void *, bool);
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
@@ -34,6 +35,9 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv);
+void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
+ busy_tag_iter_fn *fn, busy_rq_iter_fn *busy_rq_fn,
+ void *priv);
static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
struct blk_mq_hw_ctx *hctx)
I do worry about the performance impact of this new filter function.
From my understanding, the _busy_iter() functions are supposed to be
efficient, such that they can be used as an alternative to having a
global atomic counter.
(cf the replacement of the global host_busy counter).
But if we're adding ever more functionality to the iterator itself
there's a good chance we'll kill the performance rendering this
assumption invalid.
Have you measured the performance impact of this?
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
hare@xxxxxxx +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer