The lpfc_send_taskmgmt() routine fails to check the return IOCB from the firmware. This means that all taskmgmt functions appear to complete even when they are failing due to device failures, or task mgmt errors. This patch corrects this by checking the iocb.ulpStatus after the command has completed. Of course even when the command completes successfully the firmware sets IOSTAT_FCP_RSP_ERROR. This indicates that the driver needs to verify the return code in the FCP RSP. So a new routine lpfc_check_fcp_rsp() has been added which verifies the RSP has a info field, and that the info field indicates success. I've also added in a check to see if the task mgmt function succeeds in the reset handlers before running lpfc_reset_flush_io_context(). In a way this is bad because now its possible to actually fall through the mid layer error handlers into the bus and host reset logic. This behavior itself changed not to long ago when the io_context calls were added. The lpfc driver would never get past device_reset_handler() because it would _ALWAYS_ return success even if the io_context failed to abort properly because the firmware would handshake the flushes. This leads to another set of bugs when there are actually commands hung against the device. I have a partial set of patches to fix that problem too.. Trace with successful LU reset. [16785.323122] lpfc 0000:10:00.1: 3:(0):0702 Issue FCP_LUN_RESET to TGT 5 LUN 510 rpi xa nlp_flag x80000000 Data: x0 x4 [16785.323329] lpfc 0000:10:00.1: 3:0336 Rsp Ring 0 error: IOCB Data: xff000020 x60 x0 x0 xfe x0 x28208ce x3ca29d12 [16785.323349] lpfc 0000:10:00.1: 3:0331 IOCB wake signaled [16785.323356] lpfc 0000:10:00.1: 3:(0):0727 TMF FCP_LUN_RESET to TGT 5 LUN 510 failed (1, 254) iocb_flag x6 [16785.323359] lpfc 0000:10:00.1: 3:(0):0702XX fcp_rsp valid 0x1, rsp len=8 code 0x0 [16785.323362] lpfc 0000:10:00.1: 3:(0):0702XX Task Mgmt actually OK, cancel error [16785.323366] lpfc 0000:10:00.1: 3:(0):0713 SCSI layer issued Device Reset (5, 510) return x2002 [16785.323562] scsi_reset_provider: waking up host to restart after TMF trace with LS reject Target reset. [16870.975793] lpfc 0000:10:00.1: 3:(0):0702 Issue FCP_TARGET_RESET to TGT 5 LUN 510 rpi xa nlp_flag x80000000 Data: x0 x4 [16870.976043] lpfc 0000:10:00.1: 3:0336 Rsp Ring 0 error: IOCB Data: xff000020 x60 x0 x0 xfe x0 x28408d1 x3ca29d12 [16870.976061] lpfc 0000:10:00.1: 3:0331 IOCB wake signaled [16870.976067] lpfc 0000:10:00.1: 3:(0):0727 TMF FCP_TARGET_RESET to TGT 5 LUN 510 failed (1, 254) iocb_flag x6 [16870.976071] lpfc 0000:10:00.1: 3:(0):0702XX fcp_rsp valid 0x1, rsp len=8 code 0x4 [16870.976074] lpfc 0000:10:00.1: 3:(0):0702XX Target rejected task management [16870.976078] lpfc 0000:10:00.1: 3:(0):0723 SCSI layer issued Target Reset (5, 510) return x2003 trace with bad device failing to respond to target reset. [17383.880074] lpfc 0000:10:00.1: 3:(0):0702 Issue FCP_TARGET_RESET to TGT 5 LUN 510 rpi xa nlp_flag x80000000 Data: x0 x6 [17443.116283] lpfc 0000:10:00.1: 3:0336 Rsp Ring 0 error: IOCB Data: x1000020 x60 x0 x0 x2 x0 x2408d3 x10229d32 [17443.116310] lpfc 0000:10:00.1: 3:0331 IOCB wake signaled [17443.116316] lpfc 0000:10:00.1: 3:(0):0727 TMF FCP_TARGET_RESET to TGT 5 LUN 510 failed (3, 2) iocb_flag x6 [17443.116321] lpfc 0000:10:00.1: 3:(0):0723 SCSI layer issued Target Reset (5, 510) return x2003
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 60e5a17..0ff3883 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -4081,6 +4081,9 @@ lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport, if (ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) { piocb->ulpFCP2Rcvy = 1; } + else + piocb->ulpFCP2Rcvy = 0; + piocb->ulpClass = (ndlp->nlp_fcp_info & 0x0f); /* ulpTimeout is only one byte */ @@ -4569,6 +4572,76 @@ lpfc_taskmgmt_name(uint8_t task_mgmt_cmd) } } + +/** + * lpfc_check_fcp_rsp - check the returned fcp_rsp to see if task failed + * @vport: The virtual port for which this call is being executed. + * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure. + * + * This routine checks the FCP RSP INFO to see if the tsk mgmt command succeded + * + * Return code : + * 0x2003 - Error + * 0x2002 - Success + **/ + +static int +lpfc_check_fcp_rsp(struct lpfc_vport *vport,struct lpfc_scsi_buf *lpfc_cmd) +{ + struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp; + uint32_t rsp_info; + uint32_t rsp_len; + uint8_t rsp_info_code; + int ret=FAILED; + + + if (fcprsp==NULL) + { + lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, + "0702X fcp_rsp is missing\n"); + } + else + { + rsp_info = fcprsp->rspStatus2; + rsp_len = be32_to_cpu(fcprsp->rspRspLen); + rsp_info_code=fcprsp->rspInfo3; + + + lpfc_printf_vlog(vport, KERN_INFO, + LOG_FCP, + "0702XX fcp_rsp valid 0x%x," + " rsp len=%d code 0x%x\n", + rsp_info, + rsp_len,rsp_info_code); + + if ( (fcprsp->rspStatus2&RSP_LEN_VALID) && (rsp_len==8) ) { + switch (rsp_info_code) { + case RSP_NO_FAILURE: + lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, + "0702XX Task Mgmt actually OK," + " cancel error\n"); + ret=SUCCESS; + break; + case RSP_TM_NOT_SUPPORTED: //TM rejected + lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, + "0702XX Target rejected task " + "management\n"); + break; + case RSP_TM_NOT_COMPLETED: //TM failed + lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, + "0702XX Target failed TM\n"); + break; + case RSP_TM_INVALID_LU: //TM to invalid LU! + lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, + "0702XX Task Mgmt " + "to invalid LUN\n"); + break; + } + } + } + return ret; +} + /** * lpfc_send_taskmgmt - Generic SCSI Task Mgmt Handler * @vport: The virtual port for which this call is being executed. @@ -4630,12 +4705,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct lpfc_rport_data *rdata, status = lpfc_sli_issue_iocb_wait(phba, LPFC_FCP_RING, iocbq, iocbqrsp, lpfc_cmd->timeout); if (status != IOCB_SUCCESS) { - if (status == IOCB_TIMEDOUT) { - iocbq->iocb_cmpl = lpfc_tskmgmt_def_cmpl; - ret = TIMEOUT_ERROR; - } else - ret = FAILED; - lpfc_cmd->status = IOSTAT_DRIVER_REJECT; + lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP, "0727 TMF %s to TGT %d LUN %d failed (%d, %d) " "iocb_flag x%x\n", @@ -4643,9 +4713,24 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct lpfc_rport_data *rdata, tgt_id, lun_id, iocbqrsp->iocb.ulpStatus, iocbqrsp->iocb.un.ulpWord[4], iocbq->iocb_flag); - } else if (status == IOCB_BUSY) - ret = FAILED; - else + + if (status == IOCB_TIMEDOUT) { + iocbq->iocb_cmpl = lpfc_tskmgmt_def_cmpl; + ret = TIMEOUT_ERROR; + } else { + if (iocbqrsp->iocb.ulpStatus==IOSTAT_FCP_RSP_ERROR) { + /* the firmware says that we need to + * check the FCP_RSP ourselves */ + ret = lpfc_check_fcp_rsp(vport,lpfc_cmd); + } else { + /* other common returns are IOSTAT_LOCAL_REJECT + * IOERR_SEQUENCE_TIMEOUT, indicating the + * device is not responding */ + ret = FAILED; + } + } + + } else ret = SUCCESS; lpfc_sli_release_iocbq(phba, iocbqrsp); @@ -4797,12 +4882,12 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd) fc_host_post_vendor_event(shost, fc_get_event_number(), sizeof(scsi_event), (char *)&scsi_event, LPFC_NL_VENDOR_ID); - status = lpfc_send_taskmgmt(vport, rdata, tgt_id, lun_id, - FCP_LUN_RESET); + ret = lpfc_send_taskmgmt(vport, rdata, tgt_id, lun_id, + FCP_LUN_RESET); lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP, "0713 SCSI layer issued Device Reset (%d, %d) " - "return x%x\n", tgt_id, lun_id, status); + "return x%x\n", tgt_id, lun_id, ret); /* * We have to clean up i/o as : they may be orphaned by the TMF; @@ -4810,8 +4895,12 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd) * So, continue on. * We will report success if all the i/o aborts successfully. */ - ret = lpfc_reset_flush_io_context(vport, tgt_id, lun_id, - LPFC_CTX_LUN); + if (ret==SUCCESS) + { + ret = lpfc_reset_flush_io_context(vport, tgt_id, lun_id, + LPFC_CTX_LUN); + } + return ret; } @@ -4864,12 +4953,12 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd) fc_host_post_vendor_event(shost, fc_get_event_number(), sizeof(scsi_event), (char *)&scsi_event, LPFC_NL_VENDOR_ID); - status = lpfc_send_taskmgmt(vport, rdata, tgt_id, lun_id, + ret = lpfc_send_taskmgmt(vport, rdata, tgt_id, lun_id, FCP_TARGET_RESET); lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP, "0723 SCSI layer issued Target Reset (%d, %d) " - "return x%x\n", tgt_id, lun_id, status); + "return x%x\n", tgt_id, lun_id, ret); /* * We have to clean up i/o as : they may be orphaned by the TMF; @@ -4877,8 +4966,11 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd) * So, continue on. * We will report success if all the i/o aborts successfully. */ - ret = lpfc_reset_flush_io_context(vport, tgt_id, lun_id, - LPFC_CTX_TGT); + if (ret==SUCCESS) + { + ret = lpfc_reset_flush_io_context(vport, tgt_id, lun_id, + LPFC_CTX_TGT); + } return ret; } diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h index 21a2ffe..dd199c9 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.h +++ b/drivers/scsi/lpfc/lpfc_scsi.h @@ -73,6 +73,7 @@ struct fcp_rsp { #define RSP_RO_MISMATCH_ERR 0x03 #define RSP_TM_NOT_SUPPORTED 0x04 /* Task mgmt function not supported */ #define RSP_TM_NOT_COMPLETED 0x05 /* Task mgmt function not performed */ +#define RSP_TM_INVALID_LU 0x09 /* Task mgmt function to invalid LU */ uint32_t rspInfoRsvd; /* FCP_RSP_INFO bytes 4-7 (reserved) */ diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 624eab3..b335533 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -9958,8 +9958,12 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, timeout_req); if (piocb->iocb_flag & LPFC_IO_WAKE) { - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "0331 IOCB wake signaled\n"); + if ((prspiocbq) && (prspiocbq->iocb.ulpStatus)) + { + retval = IOCB_ERROR; + } } else if (timeleft == 0) { lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0338 IOCB wait timeout error - no " @@ -14415,7 +14419,7 @@ lpfc_prep_seq(struct lpfc_vport *vport, struct hbq_dmabuf *seq_dmabuf) if (!iocbq) { if (first_iocbq) { first_iocbq->iocb.ulpStatus = - IOSTAT_FCP_RSP_ERROR; + IOSTAT_LOCAL_REJECT; first_iocbq->iocb.un.ulpWord[4] = IOERR_NO_RESOURCES; }