Introduce a new blist flag that indicates the device may return certain sense code/ASC/ASCQ combinations that indicate different treatment than normal. In particular, some devices need unconditional retry (aka ADD_TO_MLQUEUE) under certain conditions; otherwise path failures may be falsely detected in the "maybe_retry" case. Because different devices use different sense codes to indicate this condition, a single bit is not enough. But we also can't use a bit for every possible status code. Therefore the new flag BLIST_ABORTED_CMD_QUIRK just indicates that this is a device for which the return code should be checked. An extra "blacklist" in scsi_aborted_cmd_quirk() then checks for known ASC/ASCQ combinations, and handles them. When such a combination is encountered for the first time, a message is printed. In systems that have several different peripherals using this flag, that might lead to a wrong match without a warning. This small risk is a compromise between exactness and the excessive resources that would be required to check for matching device vendor and model name every time. Also, if there were different devices (identified by vendor/model) using the same ASC/ASCQ, the code might print stray warnings. But the additional effort to required to handle this 100% correctly is hardly justified with the current minimal "blacklist". I chose to recycle devinfo flag (1<<11) (former BLIST_INQUIRY_58, free since 496c91bbc910) for this purpose rather than extending blist_flags_t to 64 bit. This could be easily changed of course. This patch replaces the previously submitted patches "scsi: handle ABORTED_COMMAND on Fujitsu ETERNUS" and "scsi: Always retry internal target error" (Hannes Reinecke) Changes in v2: - use ARRAY_SIZE (Bart van Assche) - make blist array static const and use separate bitmask for warned flag (Bart van Assche) - Fix string comparison for SCSI vendor and model - Print warning at scsi_logging_level 0, and improve message formatting Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> --- drivers/scsi/scsi_devinfo.c | 4 +- drivers/scsi/scsi_error.c | 111 ++++++++++++++++++++++++++++++++++++++++++++ include/scsi/scsi_devinfo.h | 6 +++ 3 files changed, 120 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index dfb8da83fa50..39455734d934 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -161,12 +161,14 @@ static struct { {"DGC", "RAID", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, storage on LUN 0 */ {"DGC", "DISK", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, no storage on LUN 0 */ {"EMC", "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, - {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_REPORTLUN2}, + {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN + | BLIST_REPORTLUN2 | BLIST_ABORTED_CMD_QUIRK}, {"EMULEX", "MD21/S2 ESDI", NULL, BLIST_SINGLELUN}, {"easyRAID", "16P", NULL, BLIST_NOREPORTLUN}, {"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN}, {"easyRAID", "F8", NULL, BLIST_NOREPORTLUN}, {"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, + {"FUJITSU", "ETERNUS_DXM", "*", BLIST_ABORTED_CMD_QUIRK}, {"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN | BLIST_INQUIRY_36}, {"Generic", "USB Storage-SMC", "0180", BLIST_FORCELUN | BLIST_INQUIRY_36}, {"Generic", "USB Storage-SMC", "0207", BLIST_FORCELUN | BLIST_INQUIRY_36}, diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 62b56de38ae8..d60568f71047 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -26,6 +26,7 @@ #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/jiffies.h> +#include <linux/bitmap.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -39,6 +40,7 @@ #include <scsi/scsi_ioctl.h> #include <scsi/scsi_dh.h> #include <scsi/sg.h> +#include <scsi/scsi_devinfo.h> #include "scsi_priv.h" #include "scsi_logging.h" @@ -432,6 +434,112 @@ static void scsi_report_sense(struct scsi_device *sdev, } } +struct aborted_cmd_blist { + u8 asc; + u8 ascq; + int retval; + const char *vendor; + const char *model; +}; + +/** + * scsi_strcmp - Compare space-padded string with reference string + * @device_str: vendor or model field of struct scsi_device, + * possibly space-padded + * @ref_str: reference string to compare with + * @len: max size of device_str: 8 for vendor, 16 for model + * + * Return value: + * -1, 0, or 1, like strcmp(). + */ +static int scsi_strcmp(const char *device_str, const char *ref_str, int len) +{ + int ref_len = strlen(ref_str); + int r, i; + + WARN_ON(ref_len > len); + r = strncmp(device_str, ref_str, min(ref_len, len)); + if (r != 0) + return r; + + for (i = ref_len; i < strnlen(device_str, len); i++) + if (device_str[i] != ' ') + return 1; + return 0; +} + +/** + * scsi_aborted_cmd_quirk - Handle special return codes for ABORTED COMMAND + * @sdev: SCSI device that returned ABORTED COMMAND. + * @sshdr: Sense data + * + * Return value: + * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE + * + * Notes: + * This is only called for devices that have the blist flag + * BLIST_ABORTED_CMD_QUIRK set. + */ +static int scsi_aborted_cmd_quirk(const struct scsi_device *sdev, + const struct scsi_sense_hdr *sshdr) +{ + static const struct aborted_cmd_blist blist[] = { + /* + * 44/00: SYMMETRIX uses this code for a variety of internal + * issues, all of which can be recovered by retry + */ + { 0x44, 0x00, ADD_TO_MLQUEUE, "EMC", "SYMMETRIX" }, + /* + * c1/01: This is used by ETERNUS to indicate the + * command should be retried unconditionally + */ + { 0xc1, 0x01, ADD_TO_MLQUEUE, "FUJITSU", "ETERNUS_DXM" } + }; + const struct aborted_cmd_blist *found; + int ret = NEEDS_RETRY, i; + static DECLARE_BITMAP(warned, ARRAY_SIZE(blist)); + + for (i = 0; i < ARRAY_SIZE(blist); i++) { + if (sshdr->asc == blist[i].asc && + sshdr->ascq == blist[i].ascq) + break; + } + + if (i >= ARRAY_SIZE(blist)) + return ret; + + found = &blist[i]; + ret = found->retval; + if (test_and_set_bit(BIT(i), warned)) + return ret; + + /* + * When we encounter a known ASC/ASCQ combination, it may or may not + * match the device for which this combination is known. + * Warn only once for each known ASC/ASCQ combination. + * We can't afford making a string comparison every time in the + * SCSI command return path, and a wrong match here is expected to be + * non-fatal. + */ + if (!scsi_strcmp(sdev->vendor, found->vendor, 8) && + !scsi_strcmp(sdev->model, found->model, 16)) { + SCSI_LOG_ERROR_RECOVERY(2, + sdev_printk(KERN_INFO, sdev, + "special retcode %s for ABORTED COMMAND %02x/%02x (expected)", + scsi_mlreturn_string(ret), + sshdr->asc, sshdr->ascq)); + } else { + sdev_printk(KERN_WARNING, sdev, + "special retcode %s for ABORTED COMMAND %02x/%02x\n", + scsi_mlreturn_string(ret), + sshdr->asc, sshdr->ascq); + sdev_printk(KERN_WARNING, sdev, + "(UNEXPECTED from \"%.8s:%.16s\", please inform linux-scsi@xxxxxxxxxxxxxxx)\n", + sdev->vendor, sdev->model); + } + return ret; +} + /** * scsi_check_sense - Examine scsi cmd sense * @scmd: Cmd to have sense checked. @@ -503,6 +611,9 @@ int scsi_check_sense(struct scsi_cmnd *scmd) if (sshdr.asc == 0x10) /* DIF */ return SUCCESS; + if (sdev->sdev_bflags & BLIST_ABORTED_CMD_QUIRK) + return scsi_aborted_cmd_quirk(sdev, &sshdr); + return NEEDS_RETRY; case NOT_READY: case UNIT_ATTENTION: diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index ea67c32e870e..1f5ed53040ab 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -28,6 +28,12 @@ #define BLIST_LARGELUN ((__force blist_flags_t)(1 << 9)) /* override additional length field */ #define BLIST_INQUIRY_36 ((__force blist_flags_t)(1 << 10)) +/* + * Device uses special return codes for ABORTED COMMAND + * This flag must go together with matching status handling code in + * scsi_aborted_cmd_quirk() + */ +#define BLIST_ABORTED_CMD_QUIRK ((__force blist_flags_t)(1 << 11)) /* do not do automatic start on add */ #define BLIST_NOSTARTONADD ((__force blist_flags_t)(1 << 12)) /* try REPORT_LUNS even for SCSI-2 devs (if HBA supports more than 8 LUNs) */ -- 2.16.1