If we are handling a sg io reset there is a small window where cmds can sneak into iscsi_queuecommand even though libiscsi has sent a TMF to the driver. This does seems to not be a problem for normal drivers because they know exactly what was sent to the FW. For libiscsi this is a problem because it knows what it has sent to the driver but not what the driver sent to the FW. When we go to cleanup the running tasks, libiscsi might cleanup the sneaky cmd but the driver/FQ may not have seen the sneaky cmd and it's left running in FW. This has libiscsi just stop queueing cmds when it sends the TMF down to the driver. Both then know they see the same set of cmds. Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> --- drivers/scsi/libiscsi.c | 104 +++++++++++++++++++++++++++------------- 1 file changed, 72 insertions(+), 32 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 4834219497ee..aa5ceaffc697 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1669,29 +1669,11 @@ enum { FAILURE_SESSION_NOT_READY, }; -int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) +static bool iscsi_eh_running(struct iscsi_conn *conn, struct scsi_cmnd *sc, + int *reason) { - struct iscsi_cls_session *cls_session; - struct iscsi_host *ihost; - int reason = 0; - struct iscsi_session *session; - struct iscsi_conn *conn; - struct iscsi_task *task = NULL; - - sc->result = 0; - sc->SCp.ptr = NULL; - - ihost = shost_priv(host); - - cls_session = starget_to_session(scsi_target(sc->device)); - session = cls_session->dd_data; - spin_lock_bh(&session->frwd_lock); - - reason = iscsi_session_chkready(cls_session); - if (reason) { - sc->result = reason; - goto fault; - } + struct iscsi_session *session = conn->session; + struct iscsi_tm *tmf; if (session->state != ISCSI_STATE_LOGGED_IN) { /* @@ -1707,31 +1689,92 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) * state is bad, allowing completion to happen */ if (unlikely(system_state != SYSTEM_RUNNING)) { - reason = FAILURE_SESSION_FAILED; + *reason = FAILURE_SESSION_FAILED; sc->result = DID_NO_CONNECT << 16; break; } fallthrough; case ISCSI_STATE_IN_RECOVERY: - reason = FAILURE_SESSION_IN_RECOVERY; + *reason = FAILURE_SESSION_IN_RECOVERY; sc->result = DID_IMM_RETRY << 16; break; case ISCSI_STATE_LOGGING_OUT: - reason = FAILURE_SESSION_LOGGING_OUT; + *reason = FAILURE_SESSION_LOGGING_OUT; sc->result = DID_IMM_RETRY << 16; break; case ISCSI_STATE_RECOVERY_FAILED: - reason = FAILURE_SESSION_RECOVERY_TIMEOUT; + *reason = FAILURE_SESSION_RECOVERY_TIMEOUT; sc->result = DID_TRANSPORT_FAILFAST << 16; break; case ISCSI_STATE_TERMINATE: - reason = FAILURE_SESSION_TERMINATE; + *reason = FAILURE_SESSION_TERMINATE; sc->result = DID_NO_CONNECT << 16; break; default: - reason = FAILURE_SESSION_FREED; + *reason = FAILURE_SESSION_FREED; sc->result = DID_NO_CONNECT << 16; } + goto eh_running; + } + + if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + *reason = FAILURE_SESSION_IN_RECOVERY; + sc->result = DID_REQUEUE << 16; + goto eh_running; + } + + /* + * For sg resets make sure libiscsi, the drivers and their fw see the + * same cmds. Once we get a TMF that can affect multiple cmds stop + * queueing. + */ + if (conn->tmf_state != TMF_INITIAL) { + tmf = &conn->tmhdr; + + switch (ISCSI_TM_FUNC_VALUE(tmf)) { + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: + if (sc->device->lun != scsilun_to_int(&tmf->lun)) + break; + + ISCSI_DBG_EH(conn->session, + "Requeue cmd sent during LU RESET processing.\n"); + sc->result = DID_REQUEUE << 16; + goto eh_running; + case ISCSI_TM_FUNC_TARGET_WARM_RESET: + ISCSI_DBG_EH(conn->session, + "Requeue cmd sent during TARGET RESET processing.\n"); + sc->result = DID_REQUEUE << 16; + goto eh_running; + } + } + + return false; + +eh_running: + return true; +} + +int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) +{ + struct iscsi_cls_session *cls_session; + struct iscsi_host *ihost; + int reason = 0; + struct iscsi_session *session; + struct iscsi_conn *conn; + struct iscsi_task *task = NULL; + + sc->result = 0; + sc->SCp.ptr = NULL; + + ihost = shost_priv(host); + + cls_session = starget_to_session(scsi_target(sc->device)); + session = cls_session->dd_data; + spin_lock_bh(&session->frwd_lock); + + reason = iscsi_session_chkready(cls_session); + if (reason) { + sc->result = reason; goto fault; } @@ -1742,11 +1785,8 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto fault; } - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { - reason = FAILURE_SESSION_IN_RECOVERY; - sc->result = DID_REQUEUE << 16; + if (iscsi_eh_running(conn, sc, &reason)) goto fault; - } if (iscsi_check_cmdsn_window_closed(conn)) { reason = FAILURE_WINDOW_CLOSED; -- 2.25.1