ATA and SATA drives have had built-in retries for media errors for as long as they've been commonplace in computers (early 1990s). When libata stumbles across a bad sector, it can waste minutes sitting there doing retry after retry before finally giving up and letting the higher layers deal with it. This patch removes retries for media errors only. Signed-off-by: Mark Lord <mlord@xxxxxxxxx> --- version 3: try to improve readability. --- old/drivers/ata/libata-eh.c 2012-04-27 13:17:35.000000000 -0400 +++ linux/drivers/ata/libata-eh.c 2012-05-02 15:20:19.946827031 -0400 @@ -2046,6 +2046,26 @@ } /** + * ata_eh_worth_retry - analyze error and decide whether to retry + * @qc: qc to possibly retry + * + * Look at the cause of the error and decide if a retry + * might be useful or not. We don't want to retry media errors + * because the drive itself has probably already taken 10-30 seconds + * doing its own internal retries before reporting the failure. + */ +static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) +{ + if (qc->flags & AC_ERR_MEDIA) + return 0; /* don't retry media errors */ + if (qc->flags & ATA_QCFLAG_IO) + return 1; /* otherwise retry anything from fs stack */ + if (qc->err_mask & AC_ERR_INVALID) + return 0; /* don't retry these */ + return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ +} + +/** * ata_eh_link_autopsy - analyze error and determine recovery action * @link: host link to perform autopsy on * @@ -2119,9 +2139,7 @@ qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); /* determine whether the command is worth retrying */ - if (qc->flags & ATA_QCFLAG_IO || - (!(qc->err_mask & AC_ERR_INVALID) && - qc->err_mask != AC_ERR_DEV)) + if (ata_eh_worth_retry(qc)) qc->flags |= ATA_QCFLAG_RETRY; /* accumulate error info */
--- old/drivers/ata/libata-eh.c 2012-04-27 13:17:35.000000000 -0400 +++ linux/drivers/ata/libata-eh.c 2012-05-02 15:20:19.946827031 -0400 @@ -2046,6 +2046,26 @@ } /** + * ata_eh_worth_retry - analyze error and decide whether to retry + * @qc: qc to possibly retry + * + * Look at the cause of the error and decide if a retry + * might be useful or not. We don't want to retry media errors + * because the drive itself has probably already taken 10-30 seconds + * doing its own internal retries before reporting the failure. + */ +static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) +{ + if (qc->flags & AC_ERR_MEDIA) + return 0; /* don't retry media errors */ + if (qc->flags & ATA_QCFLAG_IO) + return 1; /* otherwise retry anything from fs stack */ + if (qc->err_mask & AC_ERR_INVALID) + return 0; /* don't retry these */ + return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ +} + +/** * ata_eh_link_autopsy - analyze error and determine recovery action * @link: host link to perform autopsy on * @@ -2119,9 +2139,7 @@ qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); /* determine whether the command is worth retrying */ - if (qc->flags & ATA_QCFLAG_IO || - (!(qc->err_mask & AC_ERR_INVALID) && - qc->err_mask != AC_ERR_DEV)) + if (ata_eh_worth_retry(qc)) qc->flags |= ATA_QCFLAG_RETRY; /* accumulate error info */