On 27/09/10 06:32, Ethan Du wrote:
The while loop when handling rw request may become deadloop in case of bad card I've seen mmcqd gets blocked forever after a single error message: mmcblk0: error -110 sending read/write command, response 0x900, card status 0x80e00 Also there was case of card reports status without error mmcblk0: error -110 sending read/write command, response 0x900, card status 0xe00 After this error, the card can stay in prg state, and never comes back, and may not report any error further. So a break out condition should be set in mmc block layer: * should not enter the waiting loop in case of error
How do you know there are not any errors where you do need to wait?
* should break out from the waiting loop, if card response with error * should break out from the waiting loop when timeout These will not help with the card, one more thing to do: * re-init the card in case of too many errors
Using mmc_detect_change() will give you a new block device. That is probably a bad idea for a non-removable card. Also if the reinitialization is going to help, then why wait for lots of errors before doing it.
Signed-off-by: Ethan Du<ethan.too@xxxxxxxxx> --- drivers/mmc/card/block.c | 35 +++++++++++++++++++++++++++-------- drivers/mmc/core/mmc.c | 9 +++++++-- drivers/mmc/core/sd.c | 8 ++++++-- include/linux/mmc/card.h | 3 +++ include/linux/mmc/mmc.h | 1 + 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index d545f79..1d304a2 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -316,12 +316,14 @@ out: return err ? 0 : 1; } +#define BUSY_TIMEOUT_MS (16 * 1024) static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) { struct mmc_blk_data *md = mq->data; struct mmc_card *card = md->queue.card; struct mmc_blk_request brq; int ret = 1, disable_multi = 0; + unsigned long timeout = 0; mmc_claim_host(card->host); @@ -453,7 +455,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) brq.stop.resp[0], status); } - if (!mmc_host_is_spi(card->host)&& rq_data_dir(req) != READ) { + if (!mmc_host_is_spi(card->host)&& rq_data_dir(req) != READ&& + !brq.cmd.error&& !brq.data.error&& !brq.stop.error) { + timeout = jiffies + msecs_to_jiffies(BUSY_TIMEOUT_MS); do { int err; @@ -466,13 +470,22 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) req->rq_disk->disk_name, err); goto cmd_err; } + if (cmd.resp[0]& R1_ERROR_MASK) { + printk(KERN_ERR "%s: card err %#x\n", + req->rq_disk->disk_name, + cmd.resp[0]); + goto cmd_err; + } /* * Some cards mishandle the status bits, * so make sure to check both the busy * indication and the card state. */ - } while (!(cmd.resp[0]& R1_READY_FOR_DATA) || - (R1_CURRENT_STATE(cmd.resp[0]) == 7)); + if ((cmd.resp[0]& R1_READY_FOR_DATA)&& + (R1_CURRENT_STATE(cmd.resp[0]) != 7)) + break; + } while (time_before(jiffies, timeout)); + /* Ignore timeout out */ #if 0 if (cmd.resp[0]& ~0x00000900) @@ -510,11 +523,11 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) return 1; - cmd_err: - /* - * If this is an SD card and we're writing, we can first - * mark the known good sectors as ok. - * +cmd_err: + /* + * If this is an SD card and we're writing, we can first + * mark the known good sectors as ok. + * * If the card is not SD, we can still ok written sectors * as reported by the controller (which might be less than * the real number of written sectors, but never more). @@ -541,6 +554,12 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); spin_unlock_irq(&md->lock); + card->err_count++; + if (card->err_count>= ERR_TRIGGER_REINIT) { + printk(KERN_WARNING "%s: re-init the card due to error\n", + req->rq_disk->disk_name); + mmc_detect_change(card->host, 0); + } return 0; } diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 6909a54..79c4759 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -535,6 +535,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (!oldcard) host->card = card; + else + oldcard->err_count = 0; return 0; @@ -563,17 +565,20 @@ static void mmc_remove(struct mmc_host *host) */ static void mmc_detect(struct mmc_host *host) { - int err; + int err = 0; BUG_ON(!host); BUG_ON(!host->card); mmc_claim_host(host); + if (host->card->err_count>= ERR_TRIGGER_REINIT) + err = mmc_init_card(host, host->ocr, host->card); /* * Just check if our card has been removed. */ - err = mmc_send_status(host->card, NULL); + if (!err) + err = mmc_send_status(host->card, NULL); mmc_release_host(host); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 0f52410..820b0d1 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -635,6 +635,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, mmc_set_bus_width(host, MMC_BUS_WIDTH_4); } + card->err_count = 0; host->card = card; return 0; @@ -662,17 +663,20 @@ static void mmc_sd_remove(struct mmc_host *host) */ static void mmc_sd_detect(struct mmc_host *host) { - int err; + int err = 0; BUG_ON(!host); BUG_ON(!host->card); mmc_claim_host(host); + if (host->card->err_count>= ERR_TRIGGER_REINIT) + err = mmc_sd_init_card(host, host->ocr, host->card); /* * Just check if our card has been removed. */ - err = mmc_send_status(host->card, NULL); + if (!err) + err = mmc_send_status(host->card, NULL); mmc_release_host(host); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 6b75250..178de17 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -143,6 +143,9 @@ struct mmc_card { const char **info; /* info strings */ struct sdio_func_tuple *tuples; /* unknown common tuples */ + unsigned int err_count; +#define ERR_TRIGGER_REINIT 1024 + struct dentry *debugfs_root; }; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index dd11ae5..3b979a1 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -122,6 +122,7 @@ #define R1_UNDERRUN (1<< 18) /* ex, c */ #define R1_OVERRUN (1<< 17) /* ex, c */ #define R1_CID_CSD_OVERWRITE (1<< 16) /* erx, c, CID/CSD overwrite */ +#define R1_ERROR_MASK 0xffff0000 #define R1_WP_ERASE_SKIP (1<< 15) /* sx, c */ #define R1_CARD_ECC_DISABLED (1<< 14) /* sx, a */ #define R1_ERASE_RESET (1<< 13) /* sr, c */
-- To unsubscribe from this list: send the line "unsubscribe linux-mmc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html