When a PLOGI/ADISC/PRLI/REG_RPI fails, the node remains in the nodelist in that state. Although the driver now frees a node when the ref count goes to zero, in this case the ref cnt doesn't reach zero because there isn't a mechanism to release the final reference. Discovery just stops. Fix by calling the node discovery state machine DEVICE_RM event whenever one of these commands fail. This will remove the final reference count and trigger node release. Co-developed-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: James Smart <james.smart@xxxxxxxxxxxx> --- drivers/scsi/lpfc/lpfc_els.c | 66 +++++++++++++++++++++++++++++--- drivers/scsi/lpfc/lpfc_hbadisc.c | 39 +++++++++++-------- 2 files changed, 85 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 565ab29abf3c..225215a04313 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1988,12 +1988,25 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, "2753 PLOGI failure DID:%06X Status:x%x/x%x\n", ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4]); - /* Do not call DSM for lpfc_els_abort'ed ELS cmds. Just execute - * the final node put to free it to the pool. + + /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ + if (lpfc_error_lost_link(irsp)) + goto check_plogi; + else + lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_PLOGI); + + /* As long as this node is not registered with the scsi or nvme + * transport, it is no longer an active node. Otherwise + * devloss handles the final cleanup. */ - if (!lpfc_error_lost_link(irsp)) + if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) { + spin_lock_irq(&ndlp->lock); + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + spin_unlock_irq(&ndlp->lock); lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_DEVICE_RM); + } } else { /* Good status, call state machine */ prsp = list_entry(((struct lpfc_dmabuf *) @@ -2004,6 +2017,7 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, NLP_EVT_CMPL_PLOGI); } + check_plogi: if (disc && vport->num_disc_nodes) { /* Check to see if there are more PLOGIs to be sent */ lpfc_more_plogi(vport); @@ -2243,6 +2257,20 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, else lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI); + + /* As long as this node is not registered with the scsi + * or nvme transport and no other PRLIs are outstanding, + * it is no longer an active node. Otherwise devloss + * handles the final cleanup. + */ + if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD)) && + !ndlp->fc4_prli_sent) { + spin_lock_irq(&ndlp->lock); + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + spin_unlock_irq(&ndlp->lock); + lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_DEVICE_RM); + } } else { /* Good status, call state machine. However, if another * PRLI is outstanding, don't call the state machine @@ -2655,14 +2683,29 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4]); /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ - if (!lpfc_error_lost_link(irsp)) + if (lpfc_error_lost_link(irsp)) + goto check_adisc; + else lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_ADISC); + + /* As long as this node is not registered with the scsi or nvme + * transport, it is no longer an active node. Otherwise + * devloss handles the final cleanup. + */ + if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) { + spin_lock_irq(&ndlp->lock); + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + spin_unlock_irq(&ndlp->lock); + lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_DEVICE_RM); + } } else /* Good status, call state machine */ lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_ADISC); + check_adisc: /* Check to see if there are more ADISCs to be sent */ if (disc && vport->num_disc_nodes) lpfc_more_adisc(vport); @@ -2879,8 +2922,21 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp->un.ulpWord[4], irsp->ulpTimeout, vport->num_disc_nodes); lpfc_disc_start(vport); + return; + } + + /* Cleanup path for failed REG_RPI handling. If REG_RPI fails, the + * driver sends a LOGO to the rport to cleanup. For fabric and + * initiator ports cleanup the node as long as it the node is not + * register with the transport. + */ + if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) { + spin_lock_irq(&ndlp->lock); + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + spin_unlock_irq(&ndlp->lock); + lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_DEVICE_RM); } - return; } /** diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 8f3106aadfc7..ac05d188f816 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -204,10 +204,13 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) spin_lock_irqsave(&ndlp->lock, iflags); ndlp->nlp_flag |= NLP_IN_DEV_LOSS; + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + /* - * The backend does not expect any more calls assoicated with this - * rport, Remove the association between rport and ndlp + * The backend does not expect any more calls associated with this + * rport. Remove the association between rport and ndlp. */ + ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD; ((struct lpfc_rport_data *)rport->dd_data)->pnode = NULL; ndlp->rport = NULL; spin_unlock_irqrestore(&ndlp->lock, iflags); @@ -248,7 +251,6 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) int warn_on = 0; int fcf_inuse = 0; unsigned long iflags; - u32 fc4_xpt_flags; vport = ndlp->vport; shost = lpfc_shost_from_vport(vport); @@ -267,10 +269,11 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) ndlp->nlp_DID, ndlp->nlp_type, ndlp->nlp_sid); lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, - "3182 %s x%06x, nflag x%x xflags x%x\n", + "3182 %s x%06x, nflag x%x xflags x%x refcnt %d\n", __func__, ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->fc4_xpt_flags); + ndlp->fc4_xpt_flags, kref_read(&ndlp->kref)); + /* If the driver is recovering the rport, ignore devloss. */ if (ndlp->nlp_state == NLP_STE_MAPPED_NODE) { lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, "0284 Devloss timeout Ignored on " @@ -282,8 +285,11 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) return fcf_inuse; } - if (ndlp->nlp_type & NLP_FABRIC) + /* Fabric nodes are done. */ + if (ndlp->nlp_type & NLP_FABRIC) { + lpfc_nlp_put(ndlp); return fcf_inuse; + } if (ndlp->nlp_sid != NLP_NO_SID) { warn_on = 1; @@ -311,12 +317,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) ndlp->nlp_state, ndlp->nlp_rpi); } - /* Should be final reference removal triggering a node free. */ - spin_lock_irqsave(shost->host_lock, iflags); - fc4_xpt_flags = ndlp->fc4_xpt_flags; - spin_unlock_irqrestore(shost->host_lock, iflags); - - if (!(fc4_xpt_flags & (NVME_XPT_REGD | SCSI_XPT_REGD))) + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); return fcf_inuse; @@ -3587,7 +3588,7 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) pmb->ctx_buf = NULL; pmb->ctx_ndlp = NULL; - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI | LOG_NODE | LOG_DISCOVERY, "0002 rpi:%x DID:%x flg:%x %d x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref), @@ -4079,8 +4080,16 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) kfree(mp); mempool_free(pmb, phba->mbox_mem_pool); - /* If no other thread is using the ndlp, free it */ - lpfc_nlp_not_used(ndlp); + /* If the node is not registered with the scsi or nvme + * transport, remove the fabric node. The failed reg_login + * is terminal. + */ + if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) { + spin_lock_irq(&ndlp->lock); + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + spin_unlock_irq(&ndlp->lock); + lpfc_nlp_not_used(ndlp); + } if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) { /* -- 2.26.2
Attachment:
smime.p7s
Description: S/MIME Cryptographic Signature