There are two types of #AC can be generated in Intel CPUs: 1. legacy alignment check #AC; 2. split lock #AC; Legacy alignment check #AC can be injected to guest if guest has enabled alignemnet check. When host enables split lock detection, i.e., split_lock_detect!=off, guest will receive an unexpected #AC when there is a split_lock happens in guest since KVM doesn't virtualize this feature to guest. Since the old guests lack split_lock #AC handler and may have split lock buges. To make guest survive from split lock, applying the similar policy as host's split lock detect configuration: - host split lock detect is sld_warn: warning the split lock happened in guest, and disabling split lock detect around VM-enter; - host split lock detect is sld_fatal: forwarding #AC to userspace. (Usually userspace dump the #AC exception and kill the guest). Note, if sld_warn and SMT is enabled, the split lock in guest's vcpu leads the disabling of split lock detect on the sibling CPU thread during the vcpu is running. Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> --- arch/x86/include/asm/cpu.h | 1 + arch/x86/kernel/cpu/intel.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 42 ++++++++++++++++++++++++++++++++++--- arch/x86/kvm/vmx/vmx.h | 3 +++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 167d0539e0ad..b46262afa6c1 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -52,6 +52,7 @@ extern enum split_lock_detect_state get_split_lock_detect_state(void); extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c); extern void switch_to_sld(unsigned long tifn); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); +extern void split_lock_detect_set(bool on); #else static inline enum split_lock_detect_state get_split_lock_detect_state(void) { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2f9c48e91caf..889469b54b5a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1124,6 +1124,12 @@ void switch_to_sld(unsigned long tifn) __sld_msr_set(!(tifn & _TIF_SLD)); } +void split_lock_detect_set(bool on) +{ + __sld_msr_set(on); +} +EXPORT_SYMBOL_GPL(split_lock_detect_set); + #define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY} /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cdb4bf50ee14..402a9152c6ee 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4553,6 +4553,12 @@ static int handle_machine_check(struct kvm_vcpu *vcpu) return 1; } +static bool guest_cpu_alignment_check_enabled(struct kvm_vcpu *vcpu) +{ + return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) && + (kvm_get_rflags(vcpu) & X86_EFLAGS_AC); +} + static int handle_exception_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4618,9 +4624,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) return handle_rmode_exception(vcpu, ex_no, error_code); switch (ex_no) { - case AC_VECTOR: - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); - return 1; case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & @@ -4649,6 +4652,29 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; break; + case AC_VECTOR: + /* + * Inject #AC back to guest only when legacy alignment check + * enabled. + * Otherwise, it must be a split-lock #AC. + * - If sld_state == sld_warn, it can let guest survive by + * setting the vcpu's diasble_split_lock_detect to true so + * that it will toggle MSR_TEST.SPLIT_LOCK_DETECT bit during + * every following VM Entry and Exit; + * - If sld_state == sld_fatal, it forwards #AC to userspace; + */ + if (guest_cpu_alignment_check_enabled(vcpu) || + WARN_ON(get_split_lock_detect_state() == sld_off)) { + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; + } + if (get_split_lock_detect_state() == sld_warn) { + pr_warn("kvm: split lock #AC happened in %s [%d]\n", + current->comm, current->pid); + vmx->disable_split_lock_detect = true; + return 1; + } + /* fall through*/ default: kvm_run->exit_reason = KVM_EXIT_EXCEPTION; kvm_run->ex.exception = ex_no; @@ -6511,6 +6537,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + if (static_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + !test_tsk_thread_flag(current, TIF_SLD) && + unlikely(vmx->disable_split_lock_detect)) + split_lock_detect_set(false); + /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); @@ -6545,6 +6576,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + if (static_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + !test_tsk_thread_flag(current, TIF_SLD) && + unlikely(vmx->disable_split_lock_detect)) + split_lock_detect_set(true); + /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) current_evmcs->hv_clean_fields |= diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7f42cf3dcd70..912eba66c5d5 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -274,6 +274,9 @@ struct vcpu_vmx { bool req_immediate_exit; + /* Disable split-lock detection when running the vCPU */ + bool disable_split_lock_detect; + /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; -- 2.23.0