Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via tdvmcall. This process renders HLT instruction execution inatomic, so any preceding instructions like STI/MOV SS will end up enabling interrupts before the HLT instruction is routed to the hypervisor. This creates scenarios where interrupts could land during HLT instruction emulation without aborting halt operation leading to idefinite halt wait times. Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") already upgraded x86_idle() to invoke tdvmcall to avoid such scenarios, but it didn't cover pv_native_safe_halt() which can be invoked using raw_safe_halt() from call sites like acpi_safe_halt(). raw_safe_halt() also returns with interrupts enabled so upgrade tdx_safe_halt() to enable interrupts by default and ensure that paravirt safe_halt() executions invoke tdx_safe_halt(). Earlier x86_idle() is now handled via tdx_idle() which simply invokes tdvmcall while preserving irq state. To avoid future call sites which cause HLT instruction emulation with irqs enabled, add a warn and fail the HLT instruction emulation. Cc: stable@xxxxxxxxxxxxxxx Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") Signed-off-by: Vishal Annapurve <vannapurve@xxxxxxxxxx> --- Changes since V1: 1) Addressed comments from Dave H - Comment regarding adding a check for TDX VMs in halt path is not resolved in v2, would like feedback around better place to do so, maybe in pv_native_safe_halt (?). 2) Added a new version of tdx_safe_halt() that will enable interrupts. 3) Previous tdx_safe_halt() implementation is moved to newly introduced tdx_idle(). V1: https://lore.kernel.org/lkml/Z5l6L3Hen9_Y3SGC@xxxxxxxxxx/T/ arch/x86/coco/tdx/tdx.c | 23 ++++++++++++++++++++++- arch/x86/include/asm/tdx.h | 2 +- arch/x86/kernel/process.c | 2 +- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 0d9b090b4880..cc2a637dca15 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,6 +14,7 @@ #include <asm/ia32.h> #include <asm/insn.h> #include <asm/insn-eval.h> +#include <asm/paravirt_types.h> #include <asm/pgtable.h> #include <asm/set_memory.h> #include <asm/traps.h> @@ -380,13 +381,18 @@ static int handle_halt(struct ve_info *ve) { const bool irq_disabled = irqs_disabled(); + if (!irq_disabled) { + WARN_ONCE(1, "HLT instruction emulation unsafe with irqs enabled\n"); + return -EIO; + } + if (__halt(irq_disabled)) return -EIO; return ve_instr_len(ve); } -void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_idle(void) { const bool irq_disabled = false; @@ -397,6 +403,12 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } +static void __cpuidle tdx_safe_halt(void) +{ + tdx_idle(); + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_module_args args = { @@ -1083,6 +1095,15 @@ void __init tdx_early_init(void) x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; +#ifdef CONFIG_PARAVIRT_XXL + /* + * halt instruction execution is not atomic for TDX VMs as it generates + * #VEs, so otherwise "safe" halt invocations which cause interrupts to + * get enabled right after halt instruction don't work for TDX VMs. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; +#endif + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index eba178996d84..dd386500ab1c 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); -void tdx_safe_halt(void); +void tdx_idle(void); bool tdx_early_handle_ve(struct pt_regs *regs); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f63f8fd00a91..4083838fe4a0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -933,7 +933,7 @@ void __init select_idle_routine(void) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_idle); } else { static_call_update(x86_idle, default_idle); } -- 2.48.1.262.g85cc9f2d1e-goog