On 30/11/2020 07:04, Hannes Reinecke wrote:
On 11/26/20 5:49 PM, John Garry wrote:
On 26/11/2020 16:27, Bart Van Assche wrote:
On 11/26/20 7:02 AM, Pradeep P V K wrote:
Observes below crash while accessing (use-after-free) request queue
member of struct request.
191.784789: <2> Unable to handle kernel paging request at virtual
address ffffff81429a4440
...
191.786174: <2> CPU: 3 PID: 213 Comm: kworker/3:1H Tainted: G S
O 5.4.61-qgki-debug-ge45de39 #1
...
191.786226: <2> Workqueue: kblockd blk_mq_timeout_work
191.786242: <2> pstate: 20c00005 (nzCv daif +PAN +UAO)
191.786261: <2> pc : bt_for_each+0x114/0x1a4
191.786274: <2> lr : bt_for_each+0xe0/0x1a4
...
191.786494: <2> Call trace:
191.786507: <2> bt_for_each+0x114/0x1a4
191.786519: <2> blk_mq_queue_tag_busy_iter+0x60/0xd4
191.786532: <2> blk_mq_timeout_work+0x54/0xe8
191.786549: <2> process_one_work+0x2cc/0x568
191.786562: <2> worker_thread+0x28c/0x518
191.786577: <2> kthread+0x160/0x170
191.786594: <2> ret_from_fork+0x10/0x18
191.786615: <2> Code: 0b080148 f9404929 f8685921 b4fffe01 (f9400028)
191.786630: <2> ---[ end trace 0f1f51d79ab3f955 ]---
191.786643: <2> Kernel panic - not syncing: Fatal exception
Fix this by updating the freed request with NULL.
This could avoid accessing the already free request from other
contexts while iterating over the requests.
Signed-off-by: Pradeep P V K <ppvk@xxxxxxxxxxxxxx>
---
block/blk-mq.c | 1 +
block/blk-mq.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 55bcee5..9996cb1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -492,6 +492,7 @@ static void __blk_mq_free_request(struct request
*rq)
blk_crypto_free_request(rq);
blk_pm_mark_last_busy(rq);
+ hctx->tags->rqs[rq->tag] = NULL;
rq->mq_hctx = NULL;
if (rq->tag != BLK_MQ_NO_TAG)
blk_mq_put_tag(hctx->tags, ctx, rq->tag);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index a52703c..8747bf1 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -224,6 +224,7 @@ static inline int
__blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx
*hctx,
struct request *rq)
{
+ hctx->tags->rqs[rq->tag] = NULL;
blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
rq->tag = BLK_MQ_NO_TAG;
Is this perhaps a block driver bug instead of a block layer core bug? If
this would be a block layer core bug, it would have been reported
before.
Isn't this the same issue which as been reported many times:
https://lore.kernel.org/linux-block/20200820180335.3109216-1-ming.lei@xxxxxxxxxx/
https://lore.kernel.org/linux-block/8376443a-ec1b-0cef-8244-ed584b96fa96@xxxxxxxxxx/
But I never saw a crash, just kasan report.
And if that above were a concern, I would have thought one would need to
use a WRITE_ONCE() here; otherwise we might have a race condition where
other CPUs still see the old value, no?
From further looking at the history here, Kashyap tried the same
approach and preference was to not add anything to the fast path:
https://lore.kernel.org/linux-block/04e2f9e8-79fa-f1cb-ab23-4a15bf3f64cc@xxxxxxxxx/
I had another solution for this which clears any references when we free
the sched tags, but never posted as it looked a bit crazy and I was
busy, below.
---->8------
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index a19cdf159b75..240804617683 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -608,7 +608,7 @@ static void blk_mq_sched_free_tags(struct
blk_mq_tag_set *set,
unsigned int hctx_idx)
{
if (hctx->sched_tags) {
- blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
+ blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx, NULL);
blk_mq_free_rq_map(hctx->sched_tags);
hctx->sched_tags = NULL;
}
@@ -711,10 +711,9 @@ void blk_mq_sched_free_requests(struct
request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
- if (hctx->sched_tags)
- blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
+ blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i, hctx->tags);
}
}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 32d82e23b095..9622cef0c38d 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -515,7 +515,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
return -ENOMEM;
}
- blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
+ blk_mq_free_rqs(set, *tagsptr, hctx->queue_num, NULL);
blk_mq_free_rq_map(*tagsptr);
*tagsptr = new;
} else {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0015a1892153..6ff815ceae34 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2256,12 +2256,12 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
EXPORT_SYMBOL_GPL(blk_mq_submit_bio); /* only for request based dm */
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
- unsigned int hctx_idx)
+ unsigned int hctx_idx, struct blk_mq_tags *references)
{
struct page *page;
if (tags->rqs && set->ops->exit_request) {
- int i;
+ int i, j;
for (i = 0; i < tags->nr_tags; i++) {
struct request *rq = tags->static_rqs[i];
@@ -2269,6 +2269,12 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set,
struct blk_mq_tags *tags,
if (!rq)
continue;
set->ops->exit_request(set, rq, hctx_idx);
+ for (j = 0; references && j < references->nr_tags; j++) {
+ struct request *old = cmpxchg(&references->rqs[j], rq, 0);
+ }
tags->static_rqs[i] = NULL;
}
}
@@ -2425,7 +2431,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set,
struct blk_mq_tags *tags,
return 0;
fail:
- blk_mq_free_rqs(set, tags, hctx_idx);
+ blk_mq_free_rqs(set, tags, hctx_idx, NULL);
return -ENOMEM;
}
@@ -2755,7 +2761,7 @@ static void blk_mq_free_map_and_requests(struct
blk_mq_tag_set *set,
unsigned int hctx_idx)
{
if (set->tags && set->tags[hctx_idx]) {
- blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
+ blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx, NULL);
blk_mq_free_rq_map(set->tags[hctx_idx]);
set->tags[hctx_idx] = NULL;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 863a2f3346d4..bee8c5de600b 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -52,7 +52,7 @@ struct request *blk_mq_dequeue_from_ctx(struct
blk_mq_hw_ctx *hctx,
* Internal helpers for allocating/freeing the request map
*/
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
- unsigned int hctx_idx);
+ unsigned int hctx_idx, struct blk_mq_tags *references);
void blk_mq_free_rq_map(struct blk_mq_tags *tags);
struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx,