On 10/20/22 13:56, Christoph Hellwig wrote:
The NVME_NS_DEAD flag is only set in nvme_set_queue_dying, which is
called in a loop over all namespaces in nvme_kill_queues. Switch it
to a controller flag checked and set outside said loop.
Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
drivers/nvme/host/core.c | 16 +++++++---------
drivers/nvme/host/nvme.h | 2 +-
2 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a74212a4f1a5f..fa7fdb744979c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4330,7 +4330,7 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
{
int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
- if (test_bit(NVME_NS_DEAD, &ns->flags))
+ if (test_bit(NVME_CTRL_NS_DEAD, &ns->ctrl->flags))
goto out;
ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
@@ -4404,7 +4404,8 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
- if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
+ if (ns->head->ns_id > nsid ||
+ test_bit(NVME_CTRL_NS_DEAD, &ns->ctrl->flags))
list_move_tail(&ns->list, &rm_list);
}
up_write(&ctrl->namespaces_rwsem);
@@ -5110,9 +5111,6 @@ static void nvme_stop_ns_queue(struct nvme_ns *ns)
*/
static void nvme_set_queue_dying(struct nvme_ns *ns)
{
- if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- return;
-
blk_mark_disk_dead(ns->disk);
nvme_start_ns_queue(ns);
}
@@ -5129,14 +5127,14 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
down_read(&ctrl->namespaces_rwsem);
-
/* Forcibly unquiesce queues to avoid blocking dispatch */
if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
nvme_start_admin_queue(ctrl);
- list_for_each_entry(ns, &ctrl->namespaces, list)
- nvme_set_queue_dying(ns);
-
+ if (!test_and_set_bit(NVME_CTRL_NS_DEAD, &ctrl->flags)) {
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ nvme_set_queue_dying(ns);
+ }
Looking at it now, I'm not sure I understand the need for this flag. It
seems to make nvme_kill_queues reentrant safe, but the admin queue
unquiesce can still end up unbalanced under reentrance?
How is this not broken today (or ever since quiesce/unquiesce started
accounting)? Maybe I lost some context on the exact subtlety of how
nvme-pci uses this interface...
up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_kill_queues);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a29877217ee65..82989a3322130 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -237,6 +237,7 @@ enum nvme_ctrl_flags {
NVME_CTRL_FAILFAST_EXPIRED = 0,
NVME_CTRL_ADMIN_Q_STOPPED = 1,
NVME_CTRL_STARTED_ONCE = 2,
+ NVME_CTRL_NS_DEAD = 3,
};
struct nvme_ctrl {
@@ -483,7 +484,6 @@ struct nvme_ns {
unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
-#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
#define NVME_NS_FORCE_RO 3
#define NVME_NS_READY 4