On 16/11/18 11:50 AM, Adrian Hunter wrote: > On 16/11/18 11:37 AM, Anisse Astier wrote: >> Hi Adrian, >> >> On Thu, Nov 08, 2018 at 05:43:32PM +0000, Hunter, Adrian wrote: >>>> -----Original Message----- >>>> From: Anisse Astier [mailto:anisse@xxxxxxxxx] >>>> Sent: Thursday, November 8, 2018 5:55 PM >>>> To: Hunter, Adrian <adrian.hunter@xxxxxxxxx> >>>> Cc: linux-mmc@xxxxxxxxxxxxxxx; Ulf Hansson <ulf.hansson@xxxxxxxxxx> >>>> Subject: Re: [PATCH] mmc: sdhci-pci: disable intel voltage switch if >>>> unsupported >>>> >>>> On Mon, Nov 05, 2018 at 03:37:05PM +0200, Adrian Hunter wrote: >>>>> On 4/11/18 6:33 PM, Anisse Astier wrote: >>>>>> Hi Adrian, >>>>>> >>>>>> On Thu, Oct 25, 2018 at 12:06:31PM +0200, Anisse Astier wrote: >>>>>>> On Tue, Oct 23, 2018 at 04:47:05PM +0300, Adrian Hunter wrote: >>>>>>>> Here are a couple of patches to get a bit more information. Also, >>>>>>>> if you config sdhci as built-in (y instead of m) then we should >>>>>>>> see a debug message from the interrupt handler. >>>>>>> >>>>>>> Please find the new log here: >>>>>>> https://anisse.astier.eu/static/dmesg-4.19.nocqe-builtin-regdump.tx >>>>>>> t.xz >>>>>>> >>>>>>> CQHCI is still disabled, sdhci is built-in, and it has your >>>>>>> register dump patches applied. Here is the register dump after the >>>>>>> first (and >>>>>>> only) timeout: >>>>>>> >>>>>>> [ 16.312690] mmc0: Timeout waiting for hardware interrupt. >>>>>>> [ 16.312702] mmc0: sdhci: ============ SDHCI REGISTER DUMP >>>> =========== >>>>>>> [ 16.312711] mmc0: sdhci: Sys addr: 0x00000008 | Version: 0x00001002 >>>>>>> [ 16.312718] mmc0: sdhci: Blk size: 0x00007200 | Blk cnt: 0x00000008 >>>>>>> [ 16.312726] mmc0: sdhci: Argument: 0x00cf3f80 | Trn mode: >>>> 0x0000003b >>>>>>> [ 16.312733] mmc0: sdhci: Present: 0x1fff0206 | Host ctl: 0x0000003d >>>>>>> [ 16.312740] mmc0: sdhci: Power: 0x0000000b | Blk gap: 0x00000080 >>>>>>> [ 16.312747] mmc0: sdhci: Wake-up: 0x00000000 | Clock: 0x00000007 >>>>>>> [ 16.312753] mmc0: sdhci: Timeout: 0x00000006 | Int stat: 0x00000000 >>>>>>> [ 16.312760] mmc0: sdhci: Int enab: 0x02ff000b | Sig enab: 0x02ff000b >>>>>>> [ 16.312767] mmc0: sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 >>>>>>> [ 16.312774] mmc0: sdhci: Caps: 0x546ec881 | Caps_1: 0x80000807 >>>>>>> [ 16.312780] mmc0: sdhci: Cmd: 0x0000123a | Max curr: 0x00000000 >>>>>>> [ 16.312786] mmc0: sdhci: Resp[0]: 0x00000800 | Resp[1]: 0x00000000 >>>>>>> [ 16.312793] mmc0: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000900 >>>>>>> [ 16.312799] mmc0: sdhci: Host ctl2: 0x0000000d >>>>>>> [ 16.312807] mmc0: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: >>>> 0x000000016ab6d200 >>>>>>> [ 16.312814] mmc0: sdhci: 0x804: 0x00000800 >>>>>>> [ 16.312821] mmc0: sdhci: 0x808: 0x00000800 >>>>>>> [ 16.312827] mmc0: sdhci: 0x810: 0x0000005a >>>>>>> [ 16.312833] mmc0: sdhci: 0x814: 0x3050eb1e >>>>>>> [ 16.312839] mmc0: sdhci: 0x818: 0x040040c8 >>>>>>> [ 16.312845] mmc0: sdhci: 0x81c: 0x00000008 >>>>>>> [ 16.312851] mmc0: sdhci: 0x820: 0x00000502 >>>>>>> [ 16.312858] mmc0: sdhci: 0x824: 0x00000811 >>>>>>> [ 16.312864] mmc0: sdhci: 0x828: 0x1c2a2927 >>>>>>> [ 16.312870] mmc0: sdhci: 0x82c: 0x000d162f >>>>>>> [ 16.312876] mmc0: sdhci: 0x830: 0x00000a0a >>>>>>> [ 16.312882] mmc0: sdhci: 0x834: 0x0001003b >>>>>>> [ 16.312889] mmc0: sdhci: 0x838: 0x00800001 >>>>>>> [ 16.312895] mmc0: sdhci: 0x83c: 0x00000000 >>>>>>> [ 16.312901] mmc0: sdhci: 0x840: 0x00000000 >>>>>>> [ 16.312904] mmc0: sdhci: >>>> ============================================ >>>>>> >>>>>> >>>>>> Is this of any help to understand what's wrong ? >>>>> >>>>> Sorry, I will look at this soon. >>>>> >>> >>> I found some time today. It seems that the ACPI _PS3 method is failing to save the tuning value. >>> That results in a CRC error, but the driver does not recover very gracefully. >>> >>> I will look at making a patch for the driver recovery, but also add some diagnostics prints to the ACPI DSDT to try to figure out what is going wrong there. >>> >>> >> >> Have you had the time to look at what diagnostics could be added ? > > I have a patch, but I am still testing it. > Please try the attached patch
>From 5c33302367e7852aa0a723e311b1749bea59f055 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@xxxxxxxxx> Date: Fri, 16 Nov 2018 14:02:12 +0200 Subject: [PATCH] mmc: sdhci-pci: Workaround GLK firmware failing to restore the tuning value GLK firmware can indicate that the tuning value will be restored after runtime suspend, but not actually do that. Add a workaround that detects such cases, and lets the driver do re-tuning instead. Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx> --- drivers/mmc/host/sdhci-pci-core.c | 70 ++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 7bfd366d970d..dc073613cbb5 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -462,6 +462,9 @@ struct intel_host { u32 dsm_fns; int drv_strength; bool d3_retune; + bool rpm_retune_ok; + u32 glk_rx_ctrl1; + u32 glk_tun_val; }; static const guid_t intel_dsm_guid = @@ -791,6 +794,69 @@ static int glk_emmc_add_host(struct sdhci_pci_slot *slot) return ret; } +#ifdef CONFIG_PM +/* Workaround firmware failing to restore the tuning value */ +static void glk_rpm_retune_wa(struct sdhci_pci_chip *chip, bool susp) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + struct intel_host *intel_host = sdhci_pci_priv(slot); + struct sdhci_host *host = slot->host; + u32 glk_rx_ctrl1; + u32 glk_tun_val; + u32 dly; + + if (intel_host->rpm_retune_ok || !mmc_can_retune(host->mmc)) + return; + + glk_rx_ctrl1 = sdhci_readl(host, 0x834); + glk_tun_val = sdhci_readl(host, 0x840); + + if (susp) { + intel_host->glk_rx_ctrl1 = glk_rx_ctrl1; + intel_host->glk_tun_val = glk_tun_val; + return; + } + + if (!intel_host->glk_tun_val) + return; + + if (glk_rx_ctrl1 != intel_host->glk_rx_ctrl1) { + intel_host->rpm_retune_ok = true; + return; + } + + dly = ((glk_rx_ctrl1 & 0x3f00) >> 8) + (intel_host->glk_tun_val << 1); + if ((dly & 0x7f) == (glk_rx_ctrl1 & 0x7f)) + return; + + intel_host->rpm_retune_ok = true; + chip->rpm_retune = true; + mmc_retune_needed(host->mmc); + pr_info("%s: Requiring re-tune after rpm resume", mmc_hostname(host->mmc)); +} + +static void glk_rpm_retune_chk(struct sdhci_pci_chip *chip, bool susp) +{ + if (chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC && + !chip->rpm_retune) + glk_rpm_retune_wa(chip, susp); +} + +static int glk_runtime_suspend(struct sdhci_pci_chip *chip) +{ + glk_rpm_retune_chk(chip, true); + + return sdhci_cqhci_runtime_suspend(chip); +} + +static int glk_runtime_resume(struct sdhci_pci_chip *chip) +{ + glk_rpm_retune_chk(chip, false); + + return sdhci_cqhci_runtime_resume(chip); +} +#endif + #ifdef CONFIG_ACPI static int ni_set_max_freq(struct sdhci_pci_slot *slot) { @@ -879,8 +945,8 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = { .resume = sdhci_cqhci_resume, #endif #ifdef CONFIG_PM - .runtime_suspend = sdhci_cqhci_runtime_suspend, - .runtime_resume = sdhci_cqhci_runtime_resume, + .runtime_suspend = glk_runtime_suspend, + .runtime_resume = glk_runtime_resume, #endif .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | -- 2.17.1