Re: [PATCH v3 1/2] blk-mq: add async quiesce interface

Chao Leng <lengchao@xxxxxxxxxx> · Mon, 27 Jul 2020 11:33:43 +0800

On 2020/7/27 10:08, Ming Lei wrote:
It is at the end and contains exactly what is needed to synchronize. Not
The sync is simply single global synchronize_rcu(), and why bother to add
extra >=40bytes for each hctx.

sure what you mean by reuse hctx->srcu?
You already reuses hctx->srcu, but not see reason to add extra rcu_synchronize
to each hctx for just simulating one single synchronize_rcu().

To sync srcu together, the extra bytes must be needed, seperate blocking
and non blocking queue to two hctx may be a not good choice.

There is two choice: the struct rcu_synchronize is added in hctx or in srcu.
Though add rcu_synchronize in srcu has a  weakness: the extra bytes is
not need if which do not need batch sync srcu, I still think it's better
for the SRCU to provide the batch synchronization interface.

We can add check ctrl->tagset->flags to provide same interface both for
blocking and non blocking queue. The code for TINY_SRCU:

---
 block/blk-mq.c           | 29 +++++++++++++++++++++++++++++
 drivers/nvme/host/core.c |  9 ++++++++-
 include/linux/blk-mq.h   |  2 ++
 include/linux/srcu.h     |  2 ++
 include/linux/srcutiny.h |  1 +
 kernel/rcu/srcutiny.c    | 16 ++++++++++++++++
 6 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e0d173beaa3..3117fc3082ff 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -235,6 +235,35 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);

+void blk_mq_quiesce_queue_async(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	blk_mq_quiesce_queue_nowait(q);
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (hctx->flags & BLK_MQ_F_BLOCKING)
+			synchronize_srcu_async(hctx->srcu);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_async);
+
+void blk_mq_quiesce_queue_async_wait(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	if (q == NULL) {
+		synchronize_rcu();
+		return;
+	}
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (hctx->flags & BLK_MQ_F_BLOCKING)
+			synchronize_srcu_async_wait(hctx->srcu);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_async_wait);
+
 /*
  * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
  * @q: request queue.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a3b1157561f5..f13aa447ab64 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4322,7 +4322,14 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)

 	down_read(&ctrl->namespaces_rwsem);
 	list_for_each_entry(ns, &ctrl->namespaces, list)
-		blk_mq_quiesce_queue(ns->queue);
+		blk_mq_quiesce_queue_async(ns->queue);
+
+	if (ctrl->tagset->flags & BLK_MQ_F_BLOCKING) {
+		list_for_each_entry(ns, &ctrl->namespaces, list)
+			blk_mq_quiesce_queue_async_wait(ns->queue);
+	} else {
+		blk_mq_quiesce_queue_async_wait(NULL);
+	}
 	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_queues);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d6fcae17da5a..092470c63558 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -515,6 +515,8 @@ void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_quiesce_queue_async(struct request_queue *q);
+void blk_mq_quiesce_queue_async_wait(struct request_queue *q);
 void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index e432cc92c73d..7e006e51ccf9 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -60,6 +60,8 @@ void cleanup_srcu_struct(struct srcu_struct *ssp);
 int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
 void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
 void synchronize_srcu(struct srcu_struct *ssp);
+void synchronize_srcu_async(struct srcu_struct *ssp);
+void synchronize_srcu_async_wait(struct srcu_struct *ssp);

 #ifdef CONFIG_DEBUG_LOCK_ALLOC

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 5a5a1941ca15..3d7d871bef61 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -23,6 +23,7 @@ struct srcu_struct {
 	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
 	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
+	struct rcu_synchronize rcu_sync;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 6208c1dae5c9..6e1468175a45 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -190,6 +190,22 @@ void synchronize_srcu(struct srcu_struct *ssp)
 }
 EXPORT_SYMBOL_GPL(synchronize_srcu);

+void synchronize_srcu_async(struct srcu_struct *ssp)
+{
+	init_rcu_head(&ssp->rcu_sync.head);
+	init_completion(&ssp->rcu_sync.completion);
+	call_srcu(ssp, &ssp->rcu_sync.head, wakeme_after_rcu_batch);
+
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_async);
+
+void synchronize_srcu_async_wait(struct srcu_struct *ssp)
+{
+	wait_for_completion(&ssp->rcu_sync.completion);
+	destroy_rcu_head(&ssp->rcu_sync.head);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_async_wait);
+
 /* Lockdep diagnostics.  */
 void __init rcu_scheduler_starting(void)
 {
--
2.16.4