On 4/24/21 3:05 PM, Mike Christie wrote: > If we are handling a sg io reset there is a small window where cmds can > sneak into iscsi_queuecommand even though libiscsi has sent a TMF to the > driver. This doesn't seem to be a problem for normal drivers because they > know exactly what was sent to the FW. For libiscsi this is a problem > because it knows what it has sent to the driver but not what the driver > sent to the FW. When we go to cleanup the running tasks, libiscsi might > cleanup the sneaky cmd but the driver/FW may not have seen the sneaky cmd > and it's left running in FW. > > This has libiscsi just stop queueing cmds when it sends the TMF down to the > driver. Both then know they see the same set of cmds. > > Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> > --- > drivers/scsi/libiscsi.c | 106 ++++++++++++++++++++++++++++------------ > 1 file changed, 74 insertions(+), 32 deletions(-) > > diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c > index 4834219497ee..80305b70298d 100644 > --- a/drivers/scsi/libiscsi.c > +++ b/drivers/scsi/libiscsi.c > @@ -1669,29 +1669,11 @@ enum { > FAILURE_SESSION_NOT_READY, > }; > > -int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) > +static bool iscsi_eh_running(struct iscsi_conn *conn, struct scsi_cmnd *sc, > + int *reason) > { > - struct iscsi_cls_session *cls_session; > - struct iscsi_host *ihost; > - int reason = 0; > - struct iscsi_session *session; > - struct iscsi_conn *conn; > - struct iscsi_task *task = NULL; > - > - sc->result = 0; > - sc->SCp.ptr = NULL; > - > - ihost = shost_priv(host); > - > - cls_session = starget_to_session(scsi_target(sc->device)); > - session = cls_session->dd_data; > - spin_lock_bh(&session->frwd_lock); > - > - reason = iscsi_session_chkready(cls_session); > - if (reason) { > - sc->result = reason; > - goto fault; > - } > + struct iscsi_session *session = conn->session; > + struct iscsi_tm *tmf; > > if (session->state != ISCSI_STATE_LOGGED_IN) { > /* > @@ -1707,31 +1689,94 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) > * state is bad, allowing completion to happen > */ > if (unlikely(system_state != SYSTEM_RUNNING)) { > - reason = FAILURE_SESSION_FAILED; > + *reason = FAILURE_SESSION_FAILED; > sc->result = DID_NO_CONNECT << 16; > break; > } > fallthrough; > case ISCSI_STATE_IN_RECOVERY: > - reason = FAILURE_SESSION_IN_RECOVERY; > + *reason = FAILURE_SESSION_IN_RECOVERY; > sc->result = DID_IMM_RETRY << 16; > break; > case ISCSI_STATE_LOGGING_OUT: > - reason = FAILURE_SESSION_LOGGING_OUT; > + *reason = FAILURE_SESSION_LOGGING_OUT; > sc->result = DID_IMM_RETRY << 16; > break; > case ISCSI_STATE_RECOVERY_FAILED: > - reason = FAILURE_SESSION_RECOVERY_TIMEOUT; > + *reason = FAILURE_SESSION_RECOVERY_TIMEOUT; > sc->result = DID_TRANSPORT_FAILFAST << 16; > break; > case ISCSI_STATE_TERMINATE: > - reason = FAILURE_SESSION_TERMINATE; > + *reason = FAILURE_SESSION_TERMINATE; > sc->result = DID_NO_CONNECT << 16; > break; > default: > - reason = FAILURE_SESSION_FREED; > + *reason = FAILURE_SESSION_FREED; > sc->result = DID_NO_CONNECT << 16; > } > + goto eh_running; > + } > + > + if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { > + *reason = FAILURE_SESSION_IN_RECOVERY; > + sc->result = DID_REQUEUE << 16; > + goto eh_running; > + } > + > + /* > + * For sg resets make sure libiscsi, the drivers and their fw see the > + * same cmds. Once we get a TMF that can affect multiple cmds stop > + * queueing. > + */ > + if (conn->tmf_state != TMF_INITIAL) { > + tmf = &conn->tmhdr; > + > + switch (ISCSI_TM_FUNC_VALUE(tmf)) { > + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: > + if (sc->device->lun != scsilun_to_int(&tmf->lun)) > + break; > + > + ISCSI_DBG_EH(conn->session, > + "Requeue cmd sent during LU RESET processing.\n"); > + sc->result = DID_REQUEUE << 16; > + goto eh_running; > + case ISCSI_TM_FUNC_TARGET_WARM_RESET: > + ISCSI_DBG_EH(conn->session, > + "Requeue cmd sent during TARGET RESET processing.\n"); > + sc->result = DID_REQUEUE << 16; > + goto eh_running; > + default: > + break; > + } > + } > + > + return false; > + > +eh_running: > + return true; > +} > + > +int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) > +{ > + struct iscsi_cls_session *cls_session; > + struct iscsi_host *ihost; > + int reason = 0; > + struct iscsi_session *session; > + struct iscsi_conn *conn; > + struct iscsi_task *task = NULL; > + > + sc->result = 0; > + sc->SCp.ptr = NULL; > + > + ihost = shost_priv(host); > + > + cls_session = starget_to_session(scsi_target(sc->device)); > + session = cls_session->dd_data; > + spin_lock_bh(&session->frwd_lock); > + > + reason = iscsi_session_chkready(cls_session); > + if (reason) { > + sc->result = reason; > goto fault; > } > > @@ -1742,11 +1787,8 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) > goto fault; > } > > - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { > - reason = FAILURE_SESSION_IN_RECOVERY; > - sc->result = DID_REQUEUE << 16; > + if (iscsi_eh_running(conn, sc, &reason)) > goto fault; > - } > > if (iscsi_check_cmdsn_window_closed(conn)) { > reason = FAILURE_WINDOW_CLOSED; > Reviewed-by: Lee Duncan <lduncan@xxxxxxxx>