Re: [PATCH v2 2/2] nvme: use blk_mq_[un]quiesce_tagset

Sagi Grimberg <sagi@xxxxxxxxxxx> · Thu, 13 Oct 2022 13:22:15 +0300

On 10/13/22 12:44, Chao Leng wrote:
All controller namespaces share the same tagset, so we can use this
interface which does the optimal operation for parallel quiesce based on
the tagset type(e.g. blocking tagsets and non-blocking tagsets).

Now use NVME_NS_STOPPED to ensure pairing quiescing and unquiescing.
If use blk_mq_[un]quiesce_tagset, NVME_NS_STOPPED will be invalided,
so introduce NVME_CTRL_STOPPED to replace NVME_NS_STOPPED.

nvme connect_q should be never quiesced, so set QUEUE_FLAG_NOQUIESCED
when init connect_q.

Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx>
Signed-off-by: Chao Leng <lengchao@xxxxxxxxxx>
---
  drivers/nvme/host/core.c | 42 +++++++++++++-----------------------------
  drivers/nvme/host/nvme.h |  2 +-
  2 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 059737c1a2c1..0d07a07e02ff 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4890,6 +4890,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
  			ret = PTR_ERR(ctrl->connect_q);
  			goto out_free_tag_set;
  		}
+		blk_queue_flag_set(QUEUE_FLAG_NOQUIESCED, ctrl->connect_q);
  	}
  
  	ctrl->tagset = set;
@@ -5089,20 +5090,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
  }
  EXPORT_SYMBOL_GPL(nvme_init_ctrl);
  
-static void nvme_start_ns_queue(struct nvme_ns *ns)
-{
-	if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags))
-		blk_mq_unquiesce_queue(ns->queue);
-}
-
-static void nvme_stop_ns_queue(struct nvme_ns *ns)
-{
-	if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags))
-		blk_mq_quiesce_queue(ns->queue);
-	else
-		blk_mq_wait_quiesce_done(ns->queue);
-}
-
  /*
   * Prepare a queue for teardown.
   *
@@ -5111,13 +5098,14 @@ static void nvme_stop_ns_queue(struct nvme_ns *ns)
   * holding bd_butex.  This will end buffered writers dirtying pages that can't
   * be synced.
   */
-static void nvme_set_queue_dying(struct nvme_ns *ns)
+static void nvme_set_queue_dying(struct nvme_ns *ns, bool start_queue)
  {
  	if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
  		return;
  
  	blk_mark_disk_dead(ns->disk);
-	nvme_start_ns_queue(ns);
+	if (start_queue)
+		blk_mq_unquiesce_queue(ns->queue);
  
  	set_capacity_and_notify(ns->disk, 0);
  }
@@ -5132,6 +5120,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
  void nvme_kill_queues(struct nvme_ctrl *ctrl)
  {
  	struct nvme_ns *ns;
+	bool start_queue = false;
  
  	down_read(&ctrl->namespaces_rwsem);
  
@@ -5139,8 +5128,11 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
  	if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
  		nvme_start_admin_queue(ctrl);
  
+	if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+		start_queue = true;

Why can't we just start the queues here? just call nvme_start_queues()?
Why does it need to happen only after we mark the disk dead?
The documentation only says that we need to set the capacity to 0
after we unquiesce, but it doesn't say that we can't unquiesce erliear.

Something like this:
--
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7e840549a940..27dc393eed97 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -5127,8 +5127,6 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
                return;

        blk_mark_disk_dead(ns->disk);
-       nvme_start_ns_queue(ns);
-
        set_capacity_and_notify(ns->disk, 0);
 }

@@ -5149,6 +5147,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
        if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
                nvme_start_admin_queue(ctrl);

+       if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags))
+               nvme_start_queues(ctrl);
+
        list_for_each_entry(ns, &ctrl->namespaces, list)
                nvme_set_queue_dying(ns);
--