On Fri, May 18, 2018 at 10:38:18AM -0600, Keith Busch wrote: > This patch fixes races that occur with simultaneous controller > resets by synchronizing request queues prior to initializing the > controller. Withouth this, a thread may attempt disabling a controller > at the same time as we're trying to enable it. > > Signed-off-by: Keith Busch <keith.busch@xxxxxxxxx> > --- > drivers/nvme/host/core.c | 21 +++++++++++++++++++-- > drivers/nvme/host/nvme.h | 1 + > drivers/nvme/host/pci.c | 1 + > 3 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index 99b857e5a7a9..1de68b56b318 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -3471,6 +3471,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, > } > EXPORT_SYMBOL_GPL(nvme_init_ctrl); > > +static void nvme_start_queue(struct nvme_ns *ns) > +{ > + blk_mq_unquiesce_queue(ns->queue); > + blk_mq_kick_requeue_list(ns->queue); > +} > + > /** > * nvme_kill_queues(): Ends all namespace queues > * @ctrl: the dead controller that needs to end > @@ -3499,7 +3505,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) > blk_set_queue_dying(ns->queue); > > /* Forcibly unquiesce queues to avoid blocking dispatch */ > - blk_mq_unquiesce_queue(ns->queue); > + nvme_start_queue(ns); > } > up_read(&ctrl->namespaces_rwsem); > } > @@ -3569,11 +3575,22 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) > > down_read(&ctrl->namespaces_rwsem); > list_for_each_entry(ns, &ctrl->namespaces, list) > - blk_mq_unquiesce_queue(ns->queue); > + nvme_start_queue(ns); > up_read(&ctrl->namespaces_rwsem); > } > EXPORT_SYMBOL_GPL(nvme_start_queues); > > +void nvme_sync_queues(struct nvme_ctrl *ctrl) > +{ > + struct nvme_ns *ns; > + > + down_read(&ctrl->namespaces_rwsem); > + list_for_each_entry(ns, &ctrl->namespaces, list) > + blk_sync_queue(ns->queue); > + up_read(&ctrl->namespaces_rwsem); > +} > +EXPORT_SYMBOL_GPL(nvme_sync_queues); This way can't sync timeout reliably, since timeout events can come from two NS at the same time, and one may be handled as RESET_TIMER, and another one can be handled as EH_HANDLED. Thanks, Ming