(Resending, because I forgot to cc linux-scsi. BIG SORRY!) Introduce a new blist flag that indicates the device may return certain sense code/ASC/ASCQ combinations that indicate different treatment than normal. In particular, some devices need unconditional retry (aka ADD_TO_MLQUEUE) under certain conditions; otherwise path failures may be falsely detected. Because different devices use different sense codes to indicate this condition, a single bit is not enough. But we also can't use a bit for every possible status code. Therefore the new flag BLIST_ABORTED_CMD_QUIRK just indicates that this is a device for which the return code should be checked. An extra "blacklist" in scsi_aborted_cmd_quirk() then checks for known ASC/ASCQ combinations, and handles them. When such a combination is encountered for the first time, a message is printed. In systems that have several different peripherals using this flag, that might lead to a wrong match without a warning. This small risk is a compromise between exactness and the excessive resources that would be required to check for matching device vendor and model name every time. I chose to recycle devinfo flag (1<<11) (former BLIST_INQUIRY_58, free since 496c91bbc910) for this purpose rather than extending blist_flags_t to 64 bit. This could be easily changed of course. This patch replaces the previously submitted patches "scsi: handle ABORTED_COMMAND on Fujitsu ETERNUS" and "scsi: Always retry internal target error" (Hannes Reinecke) Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> --- drivers/scsi/scsi_devinfo.c | 4 ++- drivers/scsi/scsi_error.c | 82 +++++++++++++++++++++++++++++++++++++++++++++ include/scsi/scsi_devinfo.h | 6 ++++ 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index dfb8da83fa50..39455734d934 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -161,12 +161,14 @@ static struct { {"DGC", "RAID", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, storage on LUN 0 */ {"DGC", "DISK", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, no storage on LUN 0 */ {"EMC", "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, - {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_REPORTLUN2}, + {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN + | BLIST_REPORTLUN2 | BLIST_ABORTED_CMD_QUIRK}, {"EMULEX", "MD21/S2 ESDI", NULL, BLIST_SINGLELUN}, {"easyRAID", "16P", NULL, BLIST_NOREPORTLUN}, {"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN}, {"easyRAID", "F8", NULL, BLIST_NOREPORTLUN}, {"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, + {"FUJITSU", "ETERNUS_DXM", "*", BLIST_ABORTED_CMD_QUIRK}, {"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN | BLIST_INQUIRY_36}, {"Generic", "USB Storage-SMC", "0180", BLIST_FORCELUN | BLIST_INQUIRY_36}, {"Generic", "USB Storage-SMC", "0207", BLIST_FORCELUN | BLIST_INQUIRY_36}, diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 62b56de38ae8..91f5cdc85dfa 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -39,6 +39,7 @@ #include <scsi/scsi_ioctl.h> #include <scsi/scsi_dh.h> #include <scsi/sg.h> +#include <scsi/scsi_devinfo.h> #include "scsi_priv.h" #include "scsi_logging.h" @@ -432,6 +433,84 @@ static void scsi_report_sense(struct scsi_device *sdev, } } +struct aborted_cmd_blist { + u8 asc; + u8 ascq; + int retval; + const char *vendor; + const char *model; + bool warned; +}; + +/** + * scsi_aborted_cmd_quirk - Handle special return codes for ABORTED COMMAND + * @sdev: SCSI device that returned ABORTED COMMAND. + * @sshdr: Sense data + * + * Return value: + * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE + * + * Notes: + * This is only called for devices that have the blist flag + * BLIST_ABORTED_CMD_QUIRK set. + */ +static int scsi_aborted_cmd_quirk(const struct scsi_device *sdev, + const struct scsi_sense_hdr *sshdr) +{ + static struct aborted_cmd_blist blist[] = { + /* + * 44/00: SYMMETRIX uses this code for a variety of internal + * issues, all of which can be recovered by retry + */ + { 0x44, 0x00, ADD_TO_MLQUEUE, "EMC", "SYMMETRIX", false }, + /* + * c1/01: This is used by ETERNUS to indicate the + * command should be retried unconditionally + */ + { 0xc1, 0x01, ADD_TO_MLQUEUE, "FUJITSU", "ETERNUS_DXM", false } + }; + struct aborted_cmd_blist *found = NULL; + int ret = NEEDS_RETRY, i; + + for (i = 0; i < sizeof(blist)/sizeof(struct aborted_cmd_blist); i++) { + if (sshdr->asc == blist[i].asc && + sshdr->ascq == blist[i].ascq) { + found = &blist[i]; + ret = found->retval; + break; + } + } + + if (found == NULL || found->warned) + return ret; + + found->warned = true; + + /* + * When we encounter a known ASC/ASCQ combination, it may or may not + * match the device for which this combination is known. + * Warn only once for each known ASC/ASCQ combination. + * We can't afford making a string comparison every time in the + * SCSI command return path, and a wrong match here is expected to be + * non-fatal. + */ + if (!strcmp(sdev->vendor, found->vendor) && + !strcmp(sdev->model, found->model)) { + SCSI_LOG_ERROR_RECOVERY(3, + sdev_printk(KERN_INFO, sdev, + "special retcode %d for ABORTED COMMAND %02x/%02x on %s:%s (expected)", + ret, sshdr->asc, sshdr->ascq, + sdev->vendor, sdev->model)); + } else { + SCSI_LOG_ERROR_RECOVERY(1, + sdev_printk(KERN_WARNING, sdev, + "special retcode %d for ABORTED COMMAND %02x/%02x on %s:%s (UNEXPECTED, please inform linux-scsi@xxxxxxxxxxxxxxx)", + ret, sshdr->asc, sshdr->ascq, + sdev->vendor, sdev->model)); + } + return ret; +} + /** * scsi_check_sense - Examine scsi cmd sense * @scmd: Cmd to have sense checked. @@ -503,6 +582,9 @@ int scsi_check_sense(struct scsi_cmnd *scmd) if (sshdr.asc == 0x10) /* DIF */ return SUCCESS; + if (sdev->sdev_bflags & BLIST_ABORTED_CMD_QUIRK) + return scsi_aborted_cmd_quirk(sdev, &sshdr); + return NEEDS_RETRY; case NOT_READY: case UNIT_ATTENTION: diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index ea67c32e870e..1f5ed53040ab 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -28,6 +28,12 @@ #define BLIST_LARGELUN ((__force blist_flags_t)(1 << 9)) /* override additional length field */ #define BLIST_INQUIRY_36 ((__force blist_flags_t)(1 << 10)) +/* + * Device uses special return codes for ABORTED COMMAND + * This flag must go together with matching status handling code in + * scsi_aborted_cmd_quirk() + */ +#define BLIST_ABORTED_CMD_QUIRK ((__force blist_flags_t)(1 << 11)) /* do not do automatic start on add */ #define BLIST_NOSTARTONADD ((__force blist_flags_t)(1 << 12)) /* try REPORT_LUNS even for SCSI-2 devs (if HBA supports more than 8 LUNs) */ -- 2.16.1