---
block/blk-mq.c | 45
++++++++++++++++++++++++++++++++-------------
include/linux/blkdev.h | 2 ++
2 files changed, 34 insertions(+), 13 deletions(-)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -642,6 +642,8 @@ void blk_mq_complete_request(struct requ
hctx_lock(hctx, &srcu_idx);
if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
__blk_mq_complete_request(rq);
+ else
+ rq->missed_completion = true;
hctx_unlock(hctx, srcu_idx);
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -684,6 +686,7 @@ void blk_mq_start_request(struct request
blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
blk_add_timer(rq);
+ rq->missed_completion = false;
write_seqcount_end(&rq->gstate_seq);
preempt_enable();
@@ -881,7 +884,7 @@ static void blk_mq_check_expired(struct
}
}
-static void blk_mq_timeout_sync_rcu(struct request_queue *q)
+static void blk_mq_timeout_sync_rcu(struct request_queue *q, bool
clear)
{
struct blk_mq_hw_ctx *hctx;
bool has_rcu = false;
@@ -896,7 +899,8 @@ static void blk_mq_timeout_sync_rcu(stru
else
synchronize_srcu(hctx->srcu);
- hctx->need_sync_rcu = false;
+ if (clear)
+ hctx->need_sync_rcu = false;
}
if (has_rcu)
synchronize_rcu();
@@ -917,25 +921,37 @@ static void blk_mq_terminate_expired(str
blk_mq_rq_timed_out(hctx, rq, priv, reserved);
}
-static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx,
+static void blk_mq_timeout_reset_return(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
/*
* @rq's timer reset has gone through rcu synchronization and is
* visible now. Allow normal completions again by resetting
* ->aborted_gstate. Don't clear RQF_MQ_TIMEOUT_RESET here as
- * there's no memory ordering around ->aborted_gstate making it the
- * only field safe to update. Let blk_add_timer() clear it later
- * when the request is recycled or times out again.
- *
- * As nothing prevents from completion happening while
- * ->aborted_gstate is set, this may lead to ignored completions
- * and further spurious timeouts.
+ * blk_mq_timeout_reset_cleanup() needs it again and there's no
+ * memory ordering around ->aborted_gstate making it the only field
+ * safe to update. Let blk_add_timer() clear it later when the
+ * request is recycled or times out again.
*/
if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
blk_mq_rq_update_aborted_gstate(rq, 0);
}
+static void blk_mq_timeout_reset_cleanup(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, void *priv, bool reserved)
+{
+ /*
+ * @rq is now fully returned to the normal path. If normal
+ * completion raced timeout handling, execute the missed completion
+ * here. This is safe because 1. ->missed_completion can no longer
+ * be asserted because nothing is timing out right now and 2. if
+ * ->missed_completion is set, @rq is safe from recycling because
+ * nobody could have completed it.
+ */
+ if ((rq->rq_flags & RQF_MQ_TIMEOUT_RESET) && rq->missed_completion)
+ blk_mq_complete_request(rq);
+}
+
static void blk_mq_timeout_work(struct work_struct *work)
{
struct request_queue *q =
@@ -976,7 +992,7 @@ static void blk_mq_timeout_work(struct w
* becomes a problem, we can add per-hw_ctx rcu_head and
* wait in parallel.
*/
- blk_mq_timeout_sync_rcu(q);
+ blk_mq_timeout_sync_rcu(q, true);
/* terminate the ones we won */
blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired,
@@ -988,9 +1004,12 @@ static void blk_mq_timeout_work(struct w
* reset racing against recycling.
*/
if (nr_resets) {
- blk_mq_timeout_sync_rcu(q);
+ blk_mq_timeout_sync_rcu(q, false);
+ blk_mq_queue_tag_busy_iter(q,
+ blk_mq_timeout_reset_return, NULL);
+ blk_mq_timeout_sync_rcu(q, true);
blk_mq_queue_tag_busy_iter(q,
- blk_mq_finish_timeout_reset, NULL);
+ blk_mq_timeout_reset_cleanup, NULL);
}
}
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -227,6 +227,8 @@ struct request {
unsigned int extra_len; /* length of alignment and padding */
+ bool missed_completion;
+