On Tue, 2023-12-05 at 15:25 +0100, Borislav Petkov wrote: > On Fri, Nov 10, 2023 at 12:55:59AM +1300, Kai Huang wrote: > > +static const char *mce_memory_info(struct mce *m) > > +{ > > + if (!m || !mce_is_memory_error(m) || !mce_usable_address(m)) > > + return NULL; > > + > > + /* > > + * Certain initial generations of TDX-capable CPUs have an > > + * erratum. A kernel non-temporal partial write to TDX private > > + * memory poisons that memory, and a subsequent read of that > > + * memory triggers #MC. > > + * > > + * However such #MC caused by software cannot be distinguished > > + * from the real hardware #MC. Just print additional message > > + * to show such #MC may be result of the CPU erratum. > > + */ > > + if (!boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) > > + return NULL; > > + > > + return !tdx_is_private_mem(m->addr) ? NULL : > > + "TDX private memory error. Possible kernel bug."; > > +} > > + > > static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) > > { > > struct llist_node *pending; > > struct mce_evt_llist *l; > > int apei_err = 0; > > + const char *memmsg; > > > > /* > > * Allow instrumentation around external facilities usage. Not that it > > @@ -283,6 +307,15 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) > > } > > if (exp) > > pr_emerg(HW_ERR "Machine check: %s\n", exp); > > + /* > > + * Confidential computing platforms such as TDX platforms > > + * may occur MCE due to incorrect access to confidential > > + * memory. Print additional information for such error. > > + */ > > + memmsg = mce_memory_info(final); > > + if (memmsg) > > + pr_emerg(HW_ERR "Machine check: %s\n", memmsg); > > + > > No, this is not how this is done. First of all, this function should be > called something like > > mce_dump_aux_info() > > or so to state that it is dumping some auxiliary info. > > Then, it does: > > if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) > return tdx_get_mce_info(); > > or so and you put that tdx_get_mce_info() function in TDX code and there > you do all your picking apart of things, what needs to be dumped or what > not, checking whether it is a memory error and so on. > > Thx. > Thanks Boris. Looks good to me, with one exception that it is actually TDX host, but not TDX guest. So that the above X86_FEATURE_TDX_GUEST cpu feature check needs to be replaced with the host one. Full incremental diff below. Could you take a look whether this is what you want? diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index a621721f63dd..0c02b66dcc41 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -115,13 +115,13 @@ bool platform_tdx_enabled(void); int tdx_cpu_enable(void); int tdx_enable(void); void tdx_reset_memory(void); -bool tdx_is_private_mem(unsigned long phys); +const char *tdx_get_mce_info(unsigned long phys); #else static inline bool platform_tdx_enabled(void) { return false; } static inline int tdx_cpu_enable(void) { return -ENODEV; } static inline int tdx_enable(void) { return -ENODEV; } static inline void tdx_reset_memory(void) { } -static inline bool tdx_is_private_mem(unsigned long phys) { return false; } +static inline const char *tdx_get_mce_info(unsigned long phys) { return NULL; } #endif /* CONFIG_INTEL_TDX_HOST */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e33537cfc507..b7e650b5f7ef 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -229,26 +229,20 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static const char *mce_memory_info(struct mce *m) +static const char *mce_dump_aux_info(struct mce *m) { - if (!m || !mce_is_memory_error(m) || !mce_usable_address(m)) - return NULL; - /* - * Certain initial generations of TDX-capable CPUs have an - * erratum. A kernel non-temporal partial write to TDX private - * memory poisons that memory, and a subsequent read of that - * memory triggers #MC. - * - * However such #MC caused by software cannot be distinguished - * from the real hardware #MC. Just print additional message - * to show such #MC may be result of the CPU erratum. + * Confidential computing platforms such as TDX platforms + * may occur MCE due to incorrect access to confidential + * memory. Print additional information for such error. */ - if (!boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) + if (!m || !mce_is_memory_error(m) || !mce_usable_address(m)) return NULL; - return !tdx_is_private_mem(m->addr) ? NULL : - "TDX private memory error. Possible kernel bug."; + if (platform_tdx_enabled()) + return tdx_get_mce_info(m->addr); + + return NULL; } static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) @@ -256,7 +250,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) struct llist_node *pending; struct mce_evt_llist *l; int apei_err = 0; - const char *memmsg; + const char *auxinfo; /* * Allow instrumentation around external facilities usage. Not that it @@ -307,14 +301,10 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) } if (exp) pr_emerg(HW_ERR "Machine check: %s\n", exp); - /* - * Confidential computing platforms such as TDX platforms - * may occur MCE due to incorrect access to confidential - * memory. Print additional information for such error. - */ - memmsg = mce_memory_info(final); - if (memmsg) - pr_emerg(HW_ERR "Machine check: %s\n", memmsg); + + auxinfo = mce_dump_aux_info(final); + if (auxinfo) + pr_emerg(HW_ERR "Machine check: %s\n", auxinfo); if (!fake_panic) { if (panic_timeout == 0) diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 1b84dcdf63cb..cfbaec0f43b2 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1329,7 +1329,7 @@ static bool is_pamt_page(unsigned long phys) * because it cannot distinguish #MC between software bug and real * hardware error anyway. */ -bool tdx_is_private_mem(unsigned long phys) +static bool tdx_is_private_mem(unsigned long phys) { struct tdx_module_args args = { .rcx = phys & PAGE_MASK, @@ -1391,6 +1391,25 @@ bool tdx_is_private_mem(unsigned long phys) } } +const char *tdx_get_mce_info(unsigned long phys) +{ + /* + * Certain initial generations of TDX-capable CPUs have an + * erratum. A kernel non-temporal partial write to TDX private + * memory poisons that memory, and a subsequent read of that + * memory triggers #MC. + * + * However such #MC caused by software cannot be distinguished + * from the real hardware #MC. Just print additional message + * to show such #MC may be result of the CPU erratum. + */ + if (!boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) + return NULL; + + return !tdx_is_private_mem(phys) ? NULL : + "TDX private memory error. Possible kernel bug."; +} + static int __init record_keyid_partitioning(u32 *tdx_keyid_start, u32 *nr_tdx_keyids) {