From: Justin Tee <justin.tee@xxxxxxxxxxxx> [ Upstream commit 2be1d4f11944cd6283cb97268b3e17c4424945ca ] When the HBA is undergoing a reset or is handling an errata event, NULL ptr dereference crashes may occur in routines such as lpfc_sli_flush_io_rings(), lpfc_dev_loss_tmo_callbk(), or lpfc_abort_handler(). Add NULL ptr checks before dereferencing hdwq pointers that may have been freed due to operations colliding with a reset or errata event handler. Signed-off-by: Justin Tee <justin.tee@xxxxxxxxxxxx> Link: https://lore.kernel.org/r/20240726231512.92867-4-justintee8345@xxxxxxxxx Signed-off-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> [Xiangyu: BP to fix CVE: CVE-2024-49891, no test_bit() conflict resolution] Signed-off-by: Xiangyu Chen <xiangyu.chen@xxxxxxxxxxxxx> --- drivers/scsi/lpfc/lpfc_hbadisc.c | 3 ++- drivers/scsi/lpfc/lpfc_scsi.c | 13 +++++++++++-- drivers/scsi/lpfc/lpfc_sli.c | 11 +++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index aaa98a006fdc..d3a5f10b8b83 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -177,7 +177,8 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) /* Don't schedule a worker thread event if the vport is going down. * The teardown process cleans up the node via lpfc_drop_node. */ - if (vport->load_flag & FC_UNLOADING) { + if ((vport->load_flag & FC_UNLOADING) || + !(phba->hba_flag & HBA_SETUP)) { ((struct lpfc_rport_data *)rport->dd_data)->pnode = NULL; ndlp->rport = NULL; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 2a81a42de5c1..ed32aa01c711 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5554,11 +5554,20 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) iocb = &lpfc_cmd->cur_iocbq; if (phba->sli_rev == LPFC_SLI_REV4) { - pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring; - if (!pring_s4) { + /* if the io_wq & pring are gone, the port was reset. */ + if (!phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq || + !phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, + "2877 SCSI Layer I/O Abort Request " + "IO CMPL Status x%x ID %d LUN %llu " + "HBA_SETUP %d\n", FAILED, + cmnd->device->id, + (u64)cmnd->device->lun, + (HBA_SETUP & phba->hba_flag)); ret = FAILED; goto out_unlock_hba; } + pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring; spin_lock(&pring_s4->ring_lock); } /* the command is in process of being cancelled */ diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 587e3c2f7c48..1e04b6fc127a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4668,6 +4668,17 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) /* Look on all the FCP Rings for the iotag */ if (phba->sli_rev >= LPFC_SLI_REV4) { for (i = 0; i < phba->cfg_hdw_queue; i++) { + if (!phba->sli4_hba.hdwq || + !phba->sli4_hba.hdwq[i].io_wq) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "7777 hdwq's deleted %lx " + "%lx %x %x\n", + (unsigned long)phba->pport->load_flag, + (unsigned long)phba->hba_flag, + phba->link_state, + phba->sli.sli_flag); + return; + } pring = phba->sli4_hba.hdwq[i].io_wq->pring; spin_lock_irq(&pring->ring_lock); -- 2.25.1