Re: [PATCH v2] scsi: handle special return codes for ABORTED COMMAND

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Gentle reminder - reviews welcome ...

On Tue, 2018-01-30 at 11:25 +0100, Martin Wilck wrote:
> Introduce a new blist flag that indicates the device may return
> certain
> sense code/ASC/ASCQ combinations that indicate different treatment
> than
> normal. In particular, some devices need unconditional retry (aka
> ADD_TO_MLQUEUE) under certain conditions; otherwise path failures may
> be
> falsely detected in the "maybe_retry" case.
> 
> Because different devices use different sense codes to indicate this
> condition, a single bit is not enough. But we also can't use a bit
> for
> every possible status code. Therefore the new flag
> BLIST_ABORTED_CMD_QUIRK
> just indicates that this is a device for which the return code should
> be
> checked. An extra "blacklist" in scsi_aborted_cmd_quirk() then checks
> for
> known ASC/ASCQ combinations, and handles them.
> 
> When such a combination is encountered for the first time, a message
> is
> printed. In systems that have several different peripherals using
> this
> flag, that might lead to a wrong match without a warning. This small
> risk
> is a compromise between exactness and the excessive resources that
> would be
> required to check for matching device vendor and model name every
> time.
> Also, if there were different devices (identified by vendor/model)
> using
> the same ASC/ASCQ, the code might print stray warnings. But the
> additional
> effort to required to handle this 100% correctly is hardly justified
> with
> the current minimal "blacklist".
> 
> I chose to recycle devinfo flag (1<<11) (former BLIST_INQUIRY_58,
> free
> since 496c91bbc910) for this purpose rather than extending
> blist_flags_t to
> 64 bit. This could be easily changed of course.
> 
> This patch replaces the previously submitted patches
>  "scsi: handle ABORTED_COMMAND on Fujitsu ETERNUS" and
>  "scsi: Always retry internal target error" (Hannes Reinecke)
> 
> Changes in v2:
>  - use ARRAY_SIZE (Bart van Assche)
>  - make blist array static const and use separate bitmask for warned
> flag
>    (Bart van Assche)
>  - Fix string comparison for SCSI vendor and model
>  - Print warning at scsi_logging_level 0, and improve message
> formatting
> 
> Signed-off-by: Martin Wilck <mwilck@xxxxxxxx>
> ---
>  drivers/scsi/scsi_devinfo.c |   4 +-
>  drivers/scsi/scsi_error.c   | 111
> ++++++++++++++++++++++++++++++++++++++++++++
>  include/scsi/scsi_devinfo.h |   6 +++
>  3 files changed, 120 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/scsi_devinfo.c
> b/drivers/scsi/scsi_devinfo.c
> index dfb8da83fa50..39455734d934 100644
> --- a/drivers/scsi/scsi_devinfo.c
> +++ b/drivers/scsi/scsi_devinfo.c
> @@ -161,12 +161,14 @@ static struct {
>  	{"DGC", "RAID", NULL, BLIST_SPARSELUN},	/* Dell PV
> 650F, storage on LUN 0 */
>  	{"DGC", "DISK", NULL, BLIST_SPARSELUN},	/* Dell PV
> 650F, no storage on LUN 0 */
>  	{"EMC",  "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
> -	{"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN
> | BLIST_REPORTLUN2},
> +	{"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN
> +	 | BLIST_REPORTLUN2 | BLIST_ABORTED_CMD_QUIRK},
>  	{"EMULEX", "MD21/S2     ESDI", NULL, BLIST_SINGLELUN},
>  	{"easyRAID", "16P", NULL, BLIST_NOREPORTLUN},
>  	{"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN},
>  	{"easyRAID", "F8", NULL, BLIST_NOREPORTLUN},
>  	{"FSC", "CentricStor", "*", BLIST_SPARSELUN |
> BLIST_LARGELUN},
> +	{"FUJITSU", "ETERNUS_DXM", "*", BLIST_ABORTED_CMD_QUIRK},
>  	{"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN |
> BLIST_INQUIRY_36},
>  	{"Generic", "USB Storage-SMC", "0180", BLIST_FORCELUN |
> BLIST_INQUIRY_36},
>  	{"Generic", "USB Storage-SMC", "0207", BLIST_FORCELUN |
> BLIST_INQUIRY_36},
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 62b56de38ae8..d60568f71047 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -26,6 +26,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/delay.h>
>  #include <linux/jiffies.h>
> +#include <linux/bitmap.h>
>  
>  #include <scsi/scsi.h>
>  #include <scsi/scsi_cmnd.h>
> @@ -39,6 +40,7 @@
>  #include <scsi/scsi_ioctl.h>
>  #include <scsi/scsi_dh.h>
>  #include <scsi/sg.h>
> +#include <scsi/scsi_devinfo.h>
>  
>  #include "scsi_priv.h"
>  #include "scsi_logging.h"
> @@ -432,6 +434,112 @@ static void scsi_report_sense(struct
> scsi_device *sdev,
>  	}
>  }
>  
> +struct aborted_cmd_blist {
> +	u8 asc;
> +	u8 ascq;
> +	int retval;
> +	const char *vendor;
> +	const char *model;
> +};
> +
> +/**
> + * scsi_strcmp - Compare space-padded string with reference string
> + * @device_str:	vendor or model field of struct scsi_device,
> + *		possibly space-padded
> + * @ref_str:	reference string to compare with
> + * @len:	max size of device_str: 8 for vendor, 16 for model
> + *
> + * Return value:
> + *	-1, 0, or 1, like strcmp().
> + */
> +static int scsi_strcmp(const char *device_str, const char *ref_str,
> int len)
> +{
> +	int ref_len = strlen(ref_str);
> +	int r, i;
> +
> +	WARN_ON(ref_len > len);
> +	r = strncmp(device_str, ref_str, min(ref_len, len));
> +	if (r != 0)
> +		return r;
> +
> +	for (i = ref_len; i < strnlen(device_str, len); i++)
> +		if (device_str[i] != ' ')
> +			return 1;
> +	return 0;
> +}
> +
> +/**
> + * scsi_aborted_cmd_quirk - Handle special return codes for ABORTED
> COMMAND
> + * @sdev:	SCSI device that returned ABORTED COMMAND.
> + * @sshdr:	Sense data
> + *
> + * Return value:
> + *	SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE
> + *
> + * Notes:
> + *	This is only called for devices that have the blist flag
> + *      BLIST_ABORTED_CMD_QUIRK set.
> + */
> +static int scsi_aborted_cmd_quirk(const struct scsi_device *sdev,
> +				  const struct scsi_sense_hdr
> *sshdr)
> +{
> +	static const struct aborted_cmd_blist blist[] = {
> +		/*
> +		 * 44/00: SYMMETRIX uses this code for a variety of
> internal
> +		 * issues, all of which can be recovered by retry
> +		 */
> +		{ 0x44, 0x00, ADD_TO_MLQUEUE, "EMC", "SYMMETRIX" },
> +		/*
> +		 * c1/01: This is used by ETERNUS to indicate the
> +		 * command should be retried unconditionally
> +		 */
> +		{ 0xc1, 0x01, ADD_TO_MLQUEUE, "FUJITSU",
> "ETERNUS_DXM" }
> +	};
> +	const struct aborted_cmd_blist *found;
> +	int ret = NEEDS_RETRY, i;
> +	static DECLARE_BITMAP(warned, ARRAY_SIZE(blist));
> +
> +	for (i = 0; i < ARRAY_SIZE(blist); i++) {
> +		if (sshdr->asc == blist[i].asc &&
> +		    sshdr->ascq == blist[i].ascq)
> +			break;
> +	}
> +
> +	if (i >= ARRAY_SIZE(blist))
> +		return ret;
> +
> +	found = &blist[i];
> +	ret = found->retval;
> +	if (test_and_set_bit(BIT(i), warned))
> +		return ret;
> +
> +	/*
> +	 * When we encounter a known ASC/ASCQ combination, it may or
> may not
> +	 * match the device for which this combination is known.
> +	 * Warn only once for each known ASC/ASCQ combination.
> +	 * We can't afford making a string comparison every time in
> the
> +	 * SCSI command return path, and a wrong match here is
> expected to be
> +	 * non-fatal.
> +	 */
> +	if (!scsi_strcmp(sdev->vendor, found->vendor, 8) &&
> +	    !scsi_strcmp(sdev->model, found->model, 16)) {
> +		SCSI_LOG_ERROR_RECOVERY(2,
> +			sdev_printk(KERN_INFO, sdev,
> +				    "special retcode %s for ABORTED
> COMMAND %02x/%02x (expected)",
> +				    scsi_mlreturn_string(ret),
> +				    sshdr->asc, sshdr->ascq));
> +	} else {
> +		sdev_printk(KERN_WARNING, sdev,
> +			    "special retcode %s for ABORTED COMMAND
> %02x/%02x\n",
> +			    scsi_mlreturn_string(ret),
> +			    sshdr->asc, sshdr->ascq);
> +		sdev_printk(KERN_WARNING, sdev,
> +			    "(UNEXPECTED from  \"%.8s:%.16s\",
> please inform linux-scsi@xxxxxxxxxxxxxxx)\n",
> +			    sdev->vendor, sdev->model);
> +	}
> +	return ret;
> +}
> +
>  /**
>   * scsi_check_sense - Examine scsi cmd sense
>   * @scmd:	Cmd to have sense checked.
> @@ -503,6 +611,9 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
>  		if (sshdr.asc == 0x10) /* DIF */
>  			return SUCCESS;
>  
> +		if (sdev->sdev_bflags & BLIST_ABORTED_CMD_QUIRK)
> +			return scsi_aborted_cmd_quirk(sdev, &sshdr);
> +
>  		return NEEDS_RETRY;
>  	case NOT_READY:
>  	case UNIT_ATTENTION:
> diff --git a/include/scsi/scsi_devinfo.h
> b/include/scsi/scsi_devinfo.h
> index ea67c32e870e..1f5ed53040ab 100644
> --- a/include/scsi/scsi_devinfo.h
> +++ b/include/scsi/scsi_devinfo.h
> @@ -28,6 +28,12 @@
>  #define BLIST_LARGELUN		((__force blist_flags_t)(1 <<
> 9))
>  /* override additional length field */
>  #define BLIST_INQUIRY_36	((__force blist_flags_t)(1 << 10))
> +/*
> + * Device uses special return codes for ABORTED COMMAND
> + * This flag must go together with matching status handling code in
> + * scsi_aborted_cmd_quirk()
> + */
> +#define BLIST_ABORTED_CMD_QUIRK	((__force blist_flags_t)(1 <<
> 11))
>  /* do not do automatic start on add */
>  #define BLIST_NOSTARTONADD	((__force blist_flags_t)(1 << 12))
>  /* try REPORT_LUNS even for SCSI-2 devs (if HBA supports more than 8
> LUNs) */

-- 
Dr. Martin Wilck <mwilck@xxxxxxxx>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux