drivers that have shared tagsets may need to quiesce potentially a lot of request queues that all share a single tagset (e.g. nvme). Add an interface to quiesce all the queues on a given tagset. This interface is useful because it can speedup the quiesce by doing it in parallel. For tagsets that have BLK_MQ_F_BLOCKING set, we use call_srcu to all hctxs in parallel such that all of them wait for the same rcu elapsed period with a per-hctx heap allocated rcu_synchronize. for tagsets that don't have BLK_MQ_F_BLOCKING set, we simply call a single synchronize_rcu as this is sufficient. Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx> --- block/blk-mq.c | 66 ++++++++++++++++++++++++++++++++++++++++++ include/linux/blk-mq.h | 4 +++ 2 files changed, 70 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index abcf590f6238..c37e37354330 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -209,6 +209,42 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); +static void blk_mq_quiesce_blocking_queue_async(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + blk_mq_quiesce_queue_nowait(q); + + queue_for_each_hw_ctx(q, hctx, i) { + WARN_ON_ONCE(!(hctx->flags & BLK_MQ_F_BLOCKING)); + hctx->rcu_sync = kmalloc(sizeof(*hctx->rcu_sync), GFP_KERNEL); + if (!hctx->rcu_sync) + continue; + + init_completion(&hctx->rcu_sync->completion); + init_rcu_head(&hctx->rcu_sync->head); + call_srcu(hctx->srcu, &hctx->rcu_sync->head, + wakeme_after_rcu); + } +} + +static void blk_mq_quiesce_blocking_queue_async_wait(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + queue_for_each_hw_ctx(q, hctx, i) { + WARN_ON_ONCE(!(hctx->flags & BLK_MQ_F_BLOCKING)); + if (!hctx->rcu_sync) { + synchronize_srcu(hctx->srcu); + continue; + } + wait_for_completion(&hctx->rcu_sync->completion); + destroy_rcu_head(&hctx->rcu_sync->head); + } +} + /** * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished * @q: request queue. @@ -2884,6 +2920,36 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) } } +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set) +{ + struct request_queue *q; + + mutex_lock(&set->tag_list_lock); + if (set->flags & BLK_MQ_F_BLOCKING) { + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_quiesce_blocking_queue_async(q); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_quiesce_blocking_queue_async_wait(q); + } else { + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_quiesce_queue_nowait(q); + synchronize_rcu(); + } + mutex_unlock(&set->tag_list_lock); +} +EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset); + +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set) +{ + struct request_queue *q; + + mutex_lock(&set->tag_list_lock); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_unquiesce_queue(q); + mutex_unlock(&set->tag_list_lock); +} +EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset); + static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 23230c1d031e..a85f2dedc947 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include <linux/blkdev.h> #include <linux/sbitmap.h> #include <linux/srcu.h> +#include <linux/rcupdate_wait.h> struct blk_mq_tags; struct blk_flush_queue; @@ -170,6 +171,7 @@ struct blk_mq_hw_ctx { */ struct list_head hctx_list; + struct rcu_synchronize *rcu_sync; /** * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also @@ -532,6 +534,8 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); +void blk_mq_quiesce_tagset(struct request_queue *q); +void blk_mq_unquiesce_tagset(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); -- 2.25.1