When a target swap happens, under certain conditions the node sends a LOGO. The unsolicited ELS logic responds with a reject. The logic may allocate a new node to handle this. Afterward, the new nodes are dropped incorrectly leaving them in a mis-matched state and refcounting causes a use-after-free situation leading to a crash. It is also possible that the unsolicited els handling finds a node which is in an UNUSED state. The handling moves these nodes to NPR state with a refcount of 1. Although the end of the discovery logic assumes a final put will free such a node, there are codes paths which could increment the reference count, thus the node is in NPR state and not released. Eventually this mismatch in state and refcount leads to premature release of the node causing a crash. Fix by always using the discovery engine DEVICE RM event to decrement and release the nodes (rather than explicit code that tried to do it before). This will take care of moving the node to the UNUSED state and then removes the final ref count. If there is a trigger to reuse this node, the transition from the UNUSED state clearly indicates that the initial reference is then incremented and use can continue. Co-developed-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: James Smart <james.smart@xxxxxxxxxxxx> --- drivers/scsi/lpfc/lpfc_els.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 225215a04313..45a8ebbf60bc 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8728,7 +8728,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_els_rcv_flogi(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_LOGO: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8770,7 +8771,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvRSCN++; lpfc_els_rcv_rscn(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_ADISC: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8848,7 +8850,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvLIRR++; lpfc_els_rcv_lirr(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_RLS: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8858,7 +8861,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvRLS++; lpfc_els_rcv_rls(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_RPL: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8868,7 +8872,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvRPL++; lpfc_els_rcv_rpl(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_RNID: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8878,7 +8883,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvRNID++; lpfc_els_rcv_rnid(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_RTV: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8887,7 +8893,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvRTV++; lpfc_els_rcv_rtv(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_RRQ: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8897,7 +8904,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvRRQ++; lpfc_els_rcv_rrq(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_ECHO: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, @@ -8907,7 +8915,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvECHO++; lpfc_els_rcv_echo(vport, elsiocb, ndlp); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; case ELS_CMD_REC: /* receive this due to exchange closed */ @@ -8938,7 +8947,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, "0115 Unknown ELS command x%x " "received from NPORT x%x\n", cmd, did); if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); break; } @@ -8952,7 +8962,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, NULL); /* Remove the reference from above for new nodes. */ if (newnode) - lpfc_nlp_put(ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); } /* Release the reference on this elsiocb, not the ndlp. */ -- 2.26.2
Attachment:
smime.p7s
Description: S/MIME Cryptographic Signature