struct dentry *blk_debugfs_root;
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
@@ -412,8 +413,14 @@ void blk_cleanup_queue(struct request_queue *q)
* it is safe to free requests now.
*/
mutex_lock(&q->sysfs_lock);
- if (q->elevator)
+ if (q->elevator) {
+ /*
+ * Barrier between clearing hctx->tags->rqs[] and freeing
+ * scheduler requests.
+ */
+ srcu_barrier(&blk_sched_srcu);
blk_mq_sched_free_requests(q);
+ }
mutex_unlock(&q->sysfs_lock);
percpu_ref_exit(&q->q_usage_counter);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 9c92053e704d..f3afaf1520cd 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -206,18 +206,24 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
struct blk_mq_tags *tags = hctx->tags;
bool reserved = iter_data->reserved;
struct request *rq;
+ bool res = true;
+ int idx;
if (!reserved)
bitnr += tags->nr_reserved_tags;
- rq = tags->rqs[bitnr];
+
+ idx = srcu_read_lock(&blk_sched_srcu);
+ rq = srcu_dereference(tags->rqs[bitnr], &blk_sched_srcu);
/*
* We can hit rq == NULL here, because the tagging functions
* test and set the bit before assigning ->rqs[].
*/
if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx)
- return iter_data->fn(hctx, rq, iter_data->data, reserved);
- return true;
+ res = iter_data->fn(hctx, rq, iter_data->data, reserved);
+ srcu_read_unlock(&blk_sched_srcu, idx);
+
+ return res;
}
/**
@@ -264,10 +270,13 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
struct blk_mq_tags *tags = iter_data->tags;
bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
struct request *rq;
+ bool res = true;
+ int idx;
if (!reserved)
bitnr += tags->nr_reserved_tags;
+ idx = srcu_read_lock(&blk_sched_srcu);
/*
* We can hit rq == NULL here, because the tagging functions
* test and set the bit before assigning ->rqs[].
@@ -275,13 +284,13 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
rq = tags->static_rqs[bitnr];
else
- rq = tags->rqs[bitnr];
- if (!rq)
- return true;
- if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
- !blk_mq_request_started(rq))
- return true;
- return iter_data->fn(rq, iter_data->data, reserved);
+ rq = srcu_dereference(tags->rqs[bitnr], &blk_sched_srcu);
+ if (rq && (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
+ blk_mq_request_started(rq)))
+ res = iter_data->fn(rq, iter_data->data, reserved);
+ srcu_read_unlock(&blk_sched_srcu, idx);
+
+ return res;
}
/**
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 7d3e6b333a4a..7a6d04733261 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -17,7 +17,7 @@ struct blk_mq_tags {
struct sbitmap_queue __bitmap_tags;
struct sbitmap_queue __breserved_tags;
- struct request **rqs;
+ struct request __rcu **rqs;
struct request **static_rqs;
struct list_head page_list;
};
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d4d7c1caa439..88f23a02c7c3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -495,8 +495,10 @@ static void __blk_mq_free_request(struct request *rq)
blk_crypto_free_request(rq);
blk_pm_mark_last_busy(rq);
rq->mq_hctx = NULL;
- if (rq->tag != BLK_MQ_NO_TAG)
+ if (rq->tag != BLK_MQ_NO_TAG) {
blk_mq_put_tag(hctx->tags, ctx, rq->tag);
+ rcu_assign_pointer(hctx->tags->rqs[rq->tag], NULL);
+ }
if (sched_tag != BLK_MQ_NO_TAG)
blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
blk_mq_sched_restart(hctx);
@@ -838,9 +840,20 @@ EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
+ struct request *rq;
+
if (tag < tags->nr_tags) {
- prefetch(tags->rqs[tag]);
- return tags->rqs[tag];
+ /*
+ * The srcu dereference below is protected by the request
+ * queue usage count. We can only verify that usage count after
+ * having read the request pointer.
+ */
+ rq = srcu_dereference_check(tags->rqs[tag], &blk_sched_srcu,
+ true);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && rq &&
+ percpu_ref_is_zero(&rq->q->q_usage_counter));
+ prefetch(rq);
+ return rq;
}
return NULL;
@@ -1111,7 +1124,7 @@ static bool blk_mq_get_driver_tag(struct request *rq)
rq->rq_flags |= RQF_MQ_INFLIGHT;
__blk_mq_inc_active_requests(hctx);
}
- hctx->tags->rqs[rq->tag] = rq;
+ rcu_assign_pointer(hctx->tags->rqs[rq->tag], rq);
return true;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 3616453ca28c..9ccb1818303b 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -226,6 +226,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
+ rcu_assign_pointer(hctx->tags->rqs[rq->tag], NULL);
rq->tag = BLK_MQ_NO_TAG;
if (rq->rq_flags & RQF_MQ_INFLIGHT) {
diff --git a/block/blk.h b/block/blk.h
index 3b53e44b967e..28c574cabb91 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -14,6 +14,8 @@
/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT (5 * HZ)
+extern struct srcu_struct blk_sched_srcu;
+
extern struct dentry *blk_debugfs_root;
struct blk_flush_queue {
@@ -203,6 +205,11 @@ static inline void elevator_exit(struct request_queue *q,
{
lockdep_assert_held(&q->sysfs_lock);
+ /*
+ * Barrier between clearing hctx->tags->rqs[] and freeing scheduler
+ * requests.
+ */
+ srcu_barrier(&blk_sched_srcu);
blk_mq_sched_free_requests(q);
__elevator_exit(q, e);
}
.