The following commit has been merged into the ras/core branch of tip: Commit-ID: 48da1ad8ba95ecd35d76355594c629f3ef2a954a Gitweb: https://git.kernel.org/tip/48da1ad8ba95ecd35d76355594c629f3ef2a954a Author: Yazen Ghannam <yazen.ghannam@xxxxxxx> AuthorDate: Tue, 13 Jun 2023 09:11:41 -05:00 Committer: Borislav Petkov (AMD) <bp@xxxxxxxxx> CommitterDate: Mon, 16 Oct 2023 15:31:32 +02:00 x86/mce: Define amd_mce_usable_address() Currently, all valid MCA_ADDR values are assumed to be usable on AMD systems. However, this is not correct in most cases. Notifiers expecting usable addresses may then operate on inappropriate values. Define a helper function to do AMD-specific checks for a usable memory address. List out all known cases. [ bp: Tone down the capitalized words. ] Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx> Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx> Link: https://lore.kernel.org/r/20230613141142.36801-3-yazen.ghannam@xxxxxxx --- arch/x86/kernel/cpu/mce/amd.c | 38 +++++++++++++++++++++++++++++- arch/x86/kernel/cpu/mce/core.c | 3 ++- arch/x86/kernel/cpu/mce/internal.h | 2 ++- 3 files changed, 43 insertions(+) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index c069934..f3517b8 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -746,6 +746,44 @@ bool amd_mce_is_memory_error(struct mce *m) return legacy_mce_is_memory_error(m); } +/* + * AMD systems do not have an explicit indicator that the value in MCA_ADDR is + * a system physical address. Therefore, individual cases need to be detected. + * Future cases and checks will be added as needed. + * + * 1) General case + * a) Assume address is not usable. + * 2) Poison errors + * a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy + * northbridge (bank 4). + * b) Refers to poison consumption in the core. Does not include "no action", + * "action optional", or "deferred" error severities. + * c) Will include a usable address so that immediate action can be taken. + * 3) Northbridge DRAM ECC errors + * a) Reported in legacy bank 4 with extended error code (XEC) 8. + * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore, + * this bit should not be checked. + * + * NOTE: SMCA UMC memory errors fall into case #1. + */ +bool amd_mce_usable_address(struct mce *m) +{ + /* Check special northbridge case 3) first. */ + if (!mce_flags.smca) { + if (legacy_mce_is_memory_error(m)) + return true; + else if (m->bank == 4) + return false; + } + + /* Check poison bit for all other bank types. */ + if (m->status & MCI_STATUS_POISON) + return true; + + /* Assume address is not usable for all others. */ + return false; +} + static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) { struct mce m; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6f35f72..06c21f5 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -464,6 +464,9 @@ int mce_usable_address(struct mce *m) if (!(m->status & MCI_STATUS_ADDRV)) return 0; + if (m->cpuvendor == X86_VENDOR_AMD) + return amd_mce_usable_address(m); + /* Checks after this one are Intel/Zhaoxin-specific: */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index bcf1b3c..a191554 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -210,6 +210,7 @@ extern bool filter_mce(struct mce *m); #ifdef CONFIG_X86_MCE_AMD extern bool amd_filter_mce(struct mce *m); +bool amd_mce_usable_address(struct mce *m); /* * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits @@ -237,6 +238,7 @@ static __always_inline void smca_extract_err_addr(struct mce *m) #else static inline bool amd_filter_mce(struct mce *m) { return false; } +static inline bool amd_mce_usable_address(struct mce *m) { return false; } static inline void smca_extract_err_addr(struct mce *m) { } #endif