On Thu, 7 Mar 2024, Kane Chen wrote: > The current code only prints PKGC-10 residency when the PKGC-10 > is not reached in previous 'freeze' attempt. To debug PKGC-10 issues, we > also need to know other PKGC residency counters to better triage issues. > Ex: > 1. When system is stuck in PC2, it can be caused short LTR from device. > 2. When system is stuck in PC8, it can be caused by display engine. > > To better triage issues, all PKGC residency are needed when issues happens. happen. > Example log: > CPU did not enter Package C10!!! (Package C10 cnt=0x0) > Prev Package C2 cnt = 0x2191a325de, Current Package C2 cnt = 0x21aba30724 > Prev Package C3 cnt = 0x0, Current Package C3 cnt = 0x0 > Prev Package C6 cnt = 0x0, Current Package C6 cnt = 0x0 > Prev Package C7 cnt = 0x0, Current Package C7 cnt = 0x0 > Prev Package C8 cnt = 0x0, Current Package C8 cnt = 0x0 > Prev Package C9 cnt = 0x0, Current Package C9 cnt = 0x0 > Prev Package C10 cnt = 0x0, Current Package C10 cnt = 0x0 > > With this log, we can know whether it's a stuck PC2 issue, and we can > check whether the short LTR from device causes the issue. > > Signed-off-by: Kane Chen <kane.chen@xxxxxxxxx> > --- > drivers/platform/x86/intel/pmc/core.c | 47 ++++++++++++++++++++------- > drivers/platform/x86/intel/pmc/core.h | 7 ++-- > 2 files changed, 41 insertions(+), 13 deletions(-) > > diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c > index 8f9c036809c79..b8910b671667e 100644 > --- a/drivers/platform/x86/intel/pmc/core.c > +++ b/drivers/platform/x86/intel/pmc/core.c > @@ -1389,6 +1389,15 @@ static int pmc_core_probe(struct platform_device *pdev) > return -ENOMEM; > pmcdev->pmcs[PMC_IDX_MAIN] = primary_pmc; > > + /* The last element in msr_map is empty */ > + pmcdev->num_of_pkgc = ARRAY_SIZE(msr_map) - 1; > + pmcdev->pkgc_res_cnt = devm_kcalloc(&pdev->dev, > + pmcdev->num_of_pkgc, > + sizeof(*pmcdev->pkgc_res_cnt), > + GFP_KERNEL); > + if (!pmcdev->pkgc_res_cnt) > + return -ENOMEM; > + > /* > * Coffee Lake has CPU ID of Kaby Lake and Cannon Lake PCH. So here > * Sunrisepoint PCH regmap can't be used. Use Cannon Lake PCH regmap > @@ -1432,6 +1441,7 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) > { > struct pmc_dev *pmcdev = dev_get_drvdata(dev); > struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; > + int i; unsigned int. > if (pmcdev->suspend) > pmcdev->suspend(pmcdev); > @@ -1440,9 +1450,11 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) > if (pm_suspend_via_firmware()) > return 0; > > - /* Save PC10 residency for checking later */ > - if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter)) > - return -EIO; > + /* Save PKGC residency for checking later */ > + for (i = 0; i < pmcdev->num_of_pkgc; i++) { > + if (rdmsrl_safe(msr_map[i].bit_mask, &pmcdev->pkgc_res_cnt[i])) > + return -EIO; > + } > > /* Save S0ix residency for checking later */ > if (pmc_core_dev_state_get(pmc, &pmcdev->s0ix_counter)) > @@ -1451,14 +1463,15 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) > return 0; > } > > -static inline bool pmc_core_is_pc10_failed(struct pmc_dev *pmcdev) > +static inline bool pmc_core_is_deepest_pkgc_failed(struct pmc_dev *pmcdev) > { > - u64 pc10_counter; > + u32 deepest_pkgc_msr = msr_map[pmcdev->num_of_pkgc - 1].bit_mask; > + u64 deepest_pkgc_residency; > > - if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter)) > + if (rdmsrl_safe(deepest_pkgc_msr, &deepest_pkgc_residency)) > return false; > > - if (pc10_counter == pmcdev->pc10_counter) > + if (deepest_pkgc_residency == pmcdev->pkgc_res_cnt[pmcdev->num_of_pkgc - 1]) > return true; > > return false; > @@ -1497,10 +1510,22 @@ int pmc_core_resume_common(struct pmc_dev *pmcdev) > if (!warn_on_s0ix_failures) > return 0; > > - if (pmc_core_is_pc10_failed(pmcdev)) { > - /* S0ix failed because of PC10 entry failure */ > - dev_info(dev, "CPU did not enter PC10!!! (PC10 cnt=0x%llx)\n", > - pmcdev->pc10_counter); > + if (pmc_core_is_deepest_pkgc_failed(pmcdev)) { > + /* S0ix failed because of deepest PKGC entry failure */ > + dev_info(dev, "CPU did not enter %s!!! (%s cnt=0x%llx)\n", > + msr_map[pmcdev->num_of_pkgc - 1].name, > + msr_map[pmcdev->num_of_pkgc - 1].name, > + pmcdev->pkgc_res_cnt[pmcdev->num_of_pkgc - 1]); > + > + for (i = 0; i < pmcdev->num_of_pkgc; i++) { > + u64 pc_cnt; > + > + if (!rdmsrl_safe(msr_map[i].bit_mask, &pc_cnt)) { > + dev_info(dev, "Prev %s cnt = 0x%llx, Current %s cnt = 0x%llx\n", > + msr_map[i].name, pmcdev->pkgc_res_cnt[i], > + msr_map[i].name, pc_cnt); > + } > + } > return 0; > } > > diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h > index 54137faaae2b2..fd7ae76984ac7 100644 > --- a/drivers/platform/x86/intel/pmc/core.h > +++ b/drivers/platform/x86/intel/pmc/core.h > @@ -385,7 +385,8 @@ struct pmc { > * @pmc_xram_read_bit: flag to indicate whether PMC XRAM shadow registers > * used to read MPHY PG and PLL status are available > * @mutex_lock: mutex to complete one transcation > - * @pc10_counter: PC10 residency counter > + * @pkgc_res_cnt: PKGC residency counter Array of PKGC residency counters With those addressed, feel free to add: Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx> -- i.