On Fri, 2022-07-29 at 14:41 -0700, Brian Bunker wrote: > The error path for the SCSI check condition of not ready, target in > ALUA state transition, will result in the failure of that path after > the retries are exhausted. In most cases that is well ahead of the > transition timeout established in the SCSI ALUA device handler. > > Instead, reprep the command and re-add it to the queue after a 1 > second > delay. This will allow the handler to take care of the timeout and > only fail the path if the target has exceeded the transition expiry > timeout (default 60 seconds). If the expiry timeout is exceeded, the > handler will change the path state from transitioning to standby > leading to a path failure eliminating the potential of this re-prep > to continue endlessly. In most cases the target will exit the > transitioning state well before the expiry timeout but after the > retries are exhausted as mentioned. > > Additionally remove the scsi_io_completion_reprep function which > provides little value. > > Acked-by: Krishna Kant <krishna.kant@xxxxxxxxxxxxxxx> > Acked-by: Seamus Connor <sconnor@xxxxxxxxxxxxxxx> > Signed-off-by: Brian Bunker <brian@xxxxxxxxxxxxxxx> Reviewed-by: Martin Wilck <mwilck@xxxxxxxx> > --- > drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++++++---------------- > -- > 1 file changed, 25 insertions(+), 19 deletions(-) > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index 6ffc9e4258a8..1afb267ff9a2 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -118,7 +118,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int > reason) > } > } > > -static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) > +static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long > msecs) > { > struct request *rq = scsi_cmd_to_rq(cmd); > > @@ -128,7 +128,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd > *cmd) > } else { > WARN_ON_ONCE(true); > } > - blk_mq_requeue_request(rq, true); > + > + if (msecs) { > + blk_mq_requeue_request(rq, false); > + blk_mq_delay_kick_requeue_list(rq->q, msecs); > + } else > + blk_mq_requeue_request(rq, true); > } > > /** > @@ -658,14 +663,6 @@ static unsigned int scsi_rq_err_bytes(const > struct request *rq) > return bytes; > } > > -/* Helper for scsi_io_completion() when "reprep" action required. */ > -static void scsi_io_completion_reprep(struct scsi_cmnd *cmd, > - struct request_queue *q) > -{ > - /* A new command will be prepared and issued. */ > - scsi_mq_requeue_cmd(cmd); > -} > - > static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) > { > struct request *req = scsi_cmd_to_rq(cmd); > @@ -683,14 +680,21 @@ static bool scsi_cmd_runtime_exceeced(struct > scsi_cmnd *cmd) > return false; > } > > +/* > + * When ALUA transition state is returned, reprep the cmd to > + * use the ALUA handlers transition timeout. Delay the reprep > + * 1 sec to avoid aggressive retries of the target in that > + * state. > + */ > +#define ALUA_TRANSITION_REPREP_DELAY 1000 > + > /* Helper for scsi_io_completion() when special action required. */ > static void scsi_io_completion_action(struct scsi_cmnd *cmd, int > result) > { > - struct request_queue *q = cmd->device->request_queue; > struct request *req = scsi_cmd_to_rq(cmd); > int level = 0; > - enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY, > - ACTION_DELAYED_RETRY} action; > + enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP, > + ACTION_RETRY, ACTION_DELAYED_RETRY} action; > struct scsi_sense_hdr sshdr; > bool sense_valid; > bool sense_current = true; /* false implies "deferred > sense" */ > @@ -779,8 +783,8 @@ static void scsi_io_completion_action(struct > scsi_cmnd *cmd, int result) > action = > ACTION_DELAYED_RETRY; > break; > case 0x0a: /* ALUA state transition > */ > - blk_stat = BLK_STS_TRANSPORT; > - fallthrough; > + action = > ACTION_DELAYED_REPREP; > + break; > default: > action = ACTION_FAIL; > break; > @@ -839,7 +843,10 @@ static void scsi_io_completion_action(struct > scsi_cmnd *cmd, int result) > return; > fallthrough; > case ACTION_REPREP: > - scsi_io_completion_reprep(cmd, q); > + scsi_mq_requeue_cmd(cmd, 0); > + break; > + case ACTION_DELAYED_REPREP: > + scsi_mq_requeue_cmd(cmd, > ALUA_TRANSITION_REPREP_DELAY); > break; > case ACTION_RETRY: > /* Retry the same command immediately */ > @@ -933,7 +940,7 @@ static int scsi_io_completion_nz_result(struct > scsi_cmnd *cmd, int result, > * command block will be released and the queue function will be > goosed. If we > * are not done then we have to figure out what to do next: > * > - * a) We can call scsi_io_completion_reprep(). The request will > be > + * a) We can call scsi_mq_requeue_cmd(). The request will be > * unprepared and put back on the queue. Then a new command > will > * be created for it. This should be used if we made forward > * progress, or if we want to switch from READ(10) to READ(6) > for > @@ -949,7 +956,6 @@ static int scsi_io_completion_nz_result(struct > scsi_cmnd *cmd, int result, > void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int > good_bytes) > { > int result = cmd->result; > - struct request_queue *q = cmd->device->request_queue; > struct request *req = scsi_cmd_to_rq(cmd); > blk_status_t blk_stat = BLK_STS_OK; > > @@ -986,7 +992,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, > unsigned int good_bytes) > * request just queue the command up again. > */ > if (likely(result == 0)) > - scsi_io_completion_reprep(cmd, q); > + scsi_mq_requeue_cmd(cmd, 0); > else > scsi_io_completion_action(cmd, result); > }