Turns out the current way can't drain timout completely because mod_timer() can be triggered in the work func, which can be just run inside the synced timeout work: del_timer_sync(&q->timeout); cancel_work_sync(&q->timeout_work); This patch introduces one flag of 'timeout_off' for fixing this issue, turns out this simple way does work. Also blk_quiesce_timeout() and blk_unquiesce_timeout() are introduced for draining timeout, which is needed by NVMe. Cc: Bart Van Assche <bart.vanassche@xxxxxxx> Cc: Jianchao Wang <jianchao.w.wang@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Sagi Grimberg <sagi@xxxxxxxxxxx> Cc: linux-nvme@xxxxxxxxxxxxxxxxxxx Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-core.c | 34 ++++++++++++++++++++++++++++++++-- block/blk-mq.c | 9 +++++++++ block/blk-timeout.c | 5 ++++- include/linux/blkdev.h | 4 ++++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 806ce2442819..1288a0673ed1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -388,6 +388,35 @@ void blk_stop_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_stop_queue); +static void __blk_unquiesce_timeout(struct request_queue *q) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + q->timeout_off = false; + spin_unlock_irqrestore(q->queue_lock, flags); +} + +void blk_unquiesce_timeout(struct request_queue *q) +{ + __blk_unquiesce_timeout(q); + mod_timer(&q->timeout, jiffies + q->rq_timeout); +} +EXPORT_SYMBOL(blk_unquiesce_timeout); + +void blk_quiesce_timeout(struct request_queue *q) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + q->timeout_off = true; + spin_unlock_irqrestore(q->queue_lock, flags); + + del_timer_sync(&q->timeout); + cancel_work_sync(&q->timeout_work); +} +EXPORT_SYMBOL(blk_quiesce_timeout); + /** * blk_sync_queue - cancel any pending callbacks on a queue * @q: the queue @@ -408,8 +437,7 @@ EXPORT_SYMBOL(blk_stop_queue); */ void blk_sync_queue(struct request_queue *q) { - del_timer_sync(&q->timeout); - cancel_work_sync(&q->timeout_work); + blk_quiesce_timeout(q); if (q->mq_ops) { struct blk_mq_hw_ctx *hctx; @@ -421,6 +449,8 @@ void blk_sync_queue(struct request_queue *q) } else { cancel_delayed_work_sync(&q->delay_work); } + + __blk_unquiesce_timeout(q); } EXPORT_SYMBOL(blk_sync_queue); diff --git a/block/blk-mq.c b/block/blk-mq.c index 0dc9e341c2a7..890dd3b3e3e1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -901,6 +901,15 @@ static void blk_mq_timeout_work(struct work_struct *work) }; struct blk_mq_hw_ctx *hctx; int i; + bool timeout_off; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + timeout_off = q->timeout_off; + spin_unlock_irqrestore(q->queue_lock, flags); + + if (timeout_off) + return; /* A deadlock might occur if a request is stuck requiring a * timeout at the same time a queue freeze is waiting diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 652d4d4d3e97..ffd0b609091e 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -136,12 +136,15 @@ void blk_timeout_work(struct work_struct *work) spin_lock_irqsave(q->queue_lock, flags); + if (q->timeout_off) + goto exit; + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) blk_rq_check_expired(rq, &next, &next_set); if (next_set) mod_timer(&q->timeout, round_jiffies_up(next)); - +exit: spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9af3e0f430bc..99488a5c7d34 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -584,6 +584,7 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; struct list_head timeout_list; + bool timeout_off; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -1011,6 +1012,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +extern void blk_quiesce_timeout(struct request_queue *q); +extern void blk_unquiesce_timeout(struct request_queue *q); + int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -- 2.9.5