Re: [PATCH 03/10] scsi: Use scsi_target as argument for eh_target_reset_handler()

Benjamin Block <bblock@xxxxxxxxxxxxx> · Wed, 25 Oct 2023 17:11:11 +0200

On Mon, Oct 23, 2023 at 11:28:30AM +0200, Hannes Reinecke wrote:
> diff --git a/Documentation/scsi/scsi_eh.rst b/Documentation/scsi/scsi_eh.rst
> index 553aff3062d5..cfaf841317ed 100644
> --- a/Documentation/scsi/scsi_eh.rst
> +++ b/Documentation/scsi/scsi_eh.rst
> @@ -411,6 +412,14 @@ scmd->allowed.
>  	    resetting clears all scmds on the sdev, there is no need
>  	    to choose error-completed scmds.
>  
> +	2. If !list_empty(&eh_work_q), invoke scsi_eh_target_reset().
> +
> +	``scsi_eh_target_reset``
> +
> +	    hostt->eh_target_reset_handler() is invoked for each target.

It's only invoked for targets that have commands on the work-q, not each. We
could have 16 rports open, but only one of them has a command that timed out,
and is in EH.

At least that is what I thought it does.

> +	    If target reset succeeds, all failed scmds on all ready or
> +	    offline sdevs on the target are EH-finished.
> +
>  	3. If !list_empty(&eh_work_q), invoke scsi_eh_bus_reset()
>  
>  	``scsi_eh_bus_reset``
> diff --git a/Documentation/scsi/scsi_mid_low_api.rst b/Documentation/scsi/scsi_mid_low_api.rst
> index 43083efc554b..8a075da5641b 100644
> --- a/Documentation/scsi/scsi_mid_low_api.rst
> +++ b/Documentation/scsi/scsi_mid_low_api.rst
> @@ -758,6 +758,24 @@ Details::
>  	int eh_bus_reset_handler(struct Scsi_Host * host, unsigned int channel)
>  
>  
> +    /**
> +    *      eh_target_reset_handler - issue SCSI target reset
> +    *      @starget: identifies SCSI target to be reset
> +    *
> +    *      Returns SUCCESS if command aborted else FAILED

Same as in Patch 1. Or in a later patch, if you prefer.

> +    *
> +    *      Locks: None held
> +    *
> +    *      Calling context: kernel thread
> +    *
> +    *      Notes: Invoked from scsi_eh thread. No other commands will be
> +    *      queued on current host during eh.
> +    *
> +    *      Optionally defined in: LLD
> +    **/
> +	int eh_target_reset_handler(struct scsi_target * starget)
> +
> +
>      /**
>      *      eh_device_reset_handler - issue SCSI device reset
>      *      @scp: identifies SCSI device to be reset
> diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
> index 300f8e955a53..a9d274cabb37 100644
> --- a/drivers/message/fusion/mptsas.c
> +++ b/drivers/message/fusion/mptsas.c
> @@ -2012,7 +2020,7 @@ static const struct scsi_host_template mptsas_driver_template = {
>  	.change_queue_depth 		= mptscsih_change_queue_depth,
>  	.eh_timed_out			= mptsas_eh_timed_out,
>  	.eh_abort_handler		= mptscsih_abort,
> -	.eh_device_reset_handler	= mptscsih_dev_reset,
> +	.eh_target_reset_handler	= mptsas_eh_target_reset,

Huh, it that correct? Replace the device reset handler with the target
handler? That seems odd.

>  	.eh_host_reset_handler		= mptscsih_host_reset,
>  	.bios_param			= mptscsih_bios_param,
>  	.can_queue			= MPT_SAS_CAN_QUEUE,
> diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
> index 6c5920db1e9d..d379dea7074c 100644
> --- a/drivers/message/fusion/mptspi.c
> +++ b/drivers/message/fusion/mptspi.c
> @@ -834,7 +840,7 @@ static const struct scsi_host_template mptspi_driver_template = {
>  	.slave_destroy			= mptspi_slave_destroy,
>  	.change_queue_depth 		= mptscsih_change_queue_depth,
>  	.eh_abort_handler		= mptscsih_abort,
> -	.eh_device_reset_handler	= mptscsih_dev_reset,
> +	.eh_target_reset_handler	= mptspi_eh_target_reset,

...

>  	.eh_bus_reset_handler		= mptscsih_bus_reset,
>  	.eh_host_reset_handler		= mptscsih_host_reset,
>  	.bios_param			= mptscsih_bios_param,
> diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
> index 76c136d39bf1..5c9a7c9f9a98 100644
> --- a/drivers/s390/scsi/zfcp_scsi.c
> +++ b/drivers/s390/scsi/zfcp_scsi.c
> @@ -340,9 +340,12 @@ static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt)
>  	return zfcp_scsi_task_mgmt_function(sdev, FCP_TMF_LUN_RESET);
>  }
>  
> -static int zfcp_scsi_eh_target_reset_handler(struct scsi_cmnd *scpnt)
> +/*
> + * Note: We need to select a LUN as the storage array doesn't
> + * necessarily supports LUN 0 and might refuse the target reset.

That might be true, I don't really know, but it's not the reason why we added
this "search a sdev" logic to the function. The firmware for FCP channels
needs a valid "LUN handle" for each FCP request we send, otherwise it's
rejected. So we have to find a valid handle. This is an artifact from
HBA virtualisation before NPIV on Z.

There is more details in
674595d8519f ("scsi: zfcp: decouple our scsi_eh callbacks from scsi_cmnd")

>From my perspective you can just remove the comment, or change it to match
the part about `zfcp_scsi_eh_target_reset_handler()` in what Steffen wrote in
the commit.

> + */
> +static int zfcp_scsi_eh_target_reset_handler(struct scsi_target *starget)
>  {
> -	struct scsi_target *starget = scsi_target(scpnt->device);
>  	struct fc_rport *rport = starget_to_rport(starget);
>  	struct Scsi_Host *shost = rport_to_shost(rport);
>  	struct scsi_device *sdev = NULL, *tmp_sdev;
> diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
> index 98aa17a6448a..244b7a6f8616 100644
> --- a/drivers/scsi/lpfc/lpfc_scsi.c
> +++ b/drivers/scsi/lpfc/lpfc_scsi.c
> @@ -6045,15 +6045,15 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
>   *  0x2002 - Success
>   **/
>  static int
> -lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
> +lpfc_target_reset_handler(struct scsi_target *starget)
>  {
> -	struct Scsi_Host  *shost = cmnd->device->host;
> -	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
> +	struct fc_rport *rport = starget_to_rport(starget);
> +	struct Scsi_Host  *shost = rport_to_shost(rport);
>  	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
>  	struct lpfc_rport_data *rdata;
>  	struct lpfc_nodelist *pnode;
> -	unsigned tgt_id = cmnd->device->id;
> -	uint64_t lun_id = cmnd->device->lun;
> +	unsigned tgt_id = starget->id;
> +	uint64_t lun_id = 0;

Well, hopefully storage targets for LPFC can deal with LUN 0 :)

>  	struct lpfc_scsi_event_header scsi_event;
>  	int status;
>  	u32 logit = LOG_FCP;
> diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
> index c60014e07b44..bee950f11576 100644
> --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
> +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
> @@ -4804,21 +4804,25 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd)
>  
>  /*
>   * megasas_reset_target_fusion : target reset function for fusion adapters
> - * scmd: SCSI command pointer
> + * @shost: SCSI host pointer
> + * @starget: SCSI target pointer
>   *
>   * Returns SUCCESS if all commands associated with target aborted else FAILED
>   */
>  
> -int megasas_reset_target_fusion(struct scsi_cmnd *scmd)
> +int megasas_reset_target_fusion(struct Scsi_Host *shost,
> +				struct scsi_target *starget)
>  {
>  
>  	struct megasas_instance *instance;
> +	struct scsi_device *sdev;
>  	int ret = FAILED;
> -	u16 devhandle;
> -	struct MR_PRIV_DEVICE *mr_device_priv_data;
> -	mr_device_priv_data = scmd->device->hostdata;
> +	u16 devhandle = USHRT_MAX;
> +	struct MR_PRIV_DEVICE *mr_device_priv_data = NULL;
>  
> -	instance = (struct megasas_instance *)scmd->device->host->hostdata;
> +	instance = (struct megasas_instance *)shost->hostdata;

`shost_priv()`? And you can move that to the top as well I think.

> +	starget_printk(KERN_INFO, starget,
> +		    "target reset called\n");
>  
>  	if (atomic_read(&instance->adprecovery) != MEGASAS_HBA_OPERATIONAL) {
>  		dev_err(&instance->pdev->dev, "Controller is not OPERATIONAL,"
> diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
> index 381e07f2b718..63d95aa8a5f3 100644
> --- a/drivers/scsi/mpi3mr/mpi3mr_os.c
> +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
> @@ -4090,54 +4090,41 @@ static int mpi3mr_eh_bus_reset(struct Scsi_Host *shost, unsigned int channel)
>   * Return: SUCCESS of successful termination of the scmd else
>   *         FAILED
>   */
> -static int mpi3mr_eh_target_reset(struct scsi_cmnd *scmd)
> +static int mpi3mr_eh_target_reset(struct scsi_target *starget)
>  {
> -	struct mpi3mr_ioc *mrioc = shost_priv(scmd->device->host);
> +	struct Scsi_Host *shost = dev_to_shost(&starget->dev);
> +	struct mpi3mr_ioc *mrioc = shost_priv(shost);
>  	struct mpi3mr_stgt_priv_data *stgt_priv_data;
> -	struct mpi3mr_sdev_priv_data *sdev_priv_data;
>  	u16 dev_handle;
>  	u8 resp_code = 0;
>  	int retval = FAILED, ret = 0;
>  
> -	sdev_printk(KERN_INFO, scmd->device,
> -	    "Attempting Target Reset! scmd(%p)\n", scmd);
> -	scsi_print_command(scmd);
> -
> -	sdev_priv_data = scmd->device->hostdata;
> -	if (!sdev_priv_data || !sdev_priv_data->tgt_priv_data) {
> -		sdev_printk(KERN_INFO, scmd->device,
> -		    "SCSI device is not available\n");
> -		retval = SUCCESS;
> -		goto out;
> -	}
> +	starget_printk(KERN_INFO, starget,
> +	    "Attempting Target Reset!\n");

Nitpick: you can remove the line-break.

>  
> -	stgt_priv_data = sdev_priv_data->tgt_priv_data;
> +	stgt_priv_data = starget->hostdata;
>  	dev_handle = stgt_priv_data->dev_handle;
>  	if (stgt_priv_data->dev_removed) {
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index f82551783feb..fc0510cd367c 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -1662,7 +1664,8 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost,
>  					     " target: %d\n",
>  					     current->comm, id));
>  		list_for_each_entry_safe(scmd, next, &tmp_list, eh_entry) {
> -			if (scmd_id(scmd) != id)
> +			if (scmd_channel(scmd) != channel ||
> +			    scmd_id(scmd) != id)

Hmm, interesting. So this was broken before as well?

Tbh., this seems like a fix that could go to stable? Should we have a separate
patch for it?

>  				continue;
>  
>  			if (rtn == SUCCESS)

-- 
Best Regards, Benjamin Block        /        Linux on IBM Z Kernel Development
IBM Deutschland Research & Development GmbH    /   https://www.ibm.com/privacy
Vors. Aufs.-R.: Gregor Pillen         /         Geschäftsführung: David Faller
Sitz der Ges.: Böblingen     /    Registergericht: AmtsG Stuttgart, HRB 243294