The following commit has been merged into the ras/core branch of tip: Commit-ID: 612c2addff367ee461dc99ffca2bc786f105d2ec Gitweb: https://git.kernel.org/tip/612c2addff367ee461dc99ffca2bc786f105d2ec Author: Yazen Ghannam <yazen.ghannam@xxxxxxx> AuthorDate: Tue, 22 Oct 2024 19:36:31 Committer: Borislav Petkov (AMD) <bp@xxxxxxxxx> CommitterDate: Thu, 31 Oct 2024 10:53:04 +01:00 EDAC/mce_amd: Add support for FRU text in MCA A new "FRU Text in MCA" feature is defined where the Field Replaceable Unit (FRU) Text for a device is represented by a string in the new MCA_SYND1 and MCA_SYND2 registers. This feature is supported per MCA bank, and it is advertised by the McaFruTextInMca bit (MCA_CONFIG[9]). The FRU Text is populated dynamically for each individual error state (MCA_STATUS, MCA_ADDR, et al.). Handle the case where an MCA bank covers multiple devices, for example, a Unified Memory Controller (UMC) bank that manages two DIMMs. [ Yazen: Add Avadhut as co-developer for wrapper changes. ] [ bp: Do not expose MCA_CONFIG to userspace yet. ] Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx> Co-developed-by: Avadhut Naik <avadhut.naik@xxxxxxx> Signed-off-by: Avadhut Naik <avadhut.naik@xxxxxxx> Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx> Link: https://lore.kernel.org/r/20241022194158.110073-6-avadhut.naik@xxxxxxx --- arch/x86/include/asm/mce.h | 1 + drivers/edac/mce_amd.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4d936ee..4543cf2 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -61,6 +61,7 @@ * - TCC bit is present in MCx_STATUS. */ #define MCI_CONFIG_MCAX 0x1 +#define MCI_CONFIG_FRUTEXT BIT_ULL(9) #define MCI_IPID_MCATYPE 0xFFFF0000 #define MCI_IPID_HWID 0xFFF diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 194d9fd..50d74d3 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -795,6 +795,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) struct mce *m = (struct mce *)data; struct mce_hw_err *err = to_mce_hw_err(m); unsigned int fam = x86_family(m->cpuid); + u32 mca_config_lo = 0, dummy; int ecc; if (m->kflags & MCE_HANDLED_CEC) @@ -814,11 +815,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ((m->status & MCI_STATUS_PCC) ? "PCC" : "-")); if (boot_cpu_has(X86_FEATURE_SMCA)) { - u32 low, high; - u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); + rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(m->bank), &mca_config_lo, &dummy); - if (!rdmsr_safe(addr, &low, &high) && - (low & MCI_CONFIG_MCAX)) + if (mca_config_lo & MCI_CONFIG_MCAX) pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); @@ -853,8 +852,15 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (m->status & MCI_STATUS_SYNDV) { pr_cont(", Syndrome: 0x%016llx\n", m->synd); - pr_emerg(HW_ERR "Syndrome1: 0x%016llx, Syndrome2: 0x%016llx", - err->vendor.amd.synd1, err->vendor.amd.synd2); + if (mca_config_lo & MCI_CONFIG_FRUTEXT) { + char frutext[17]; + + frutext[16] = '\0'; + memcpy(&frutext[0], &err->vendor.amd.synd1, 8); + memcpy(&frutext[8], &err->vendor.amd.synd2, 8); + + pr_emerg(HW_ERR "FRU Text: %s", frutext); + } } pr_cont("\n");