Prior to SEV-ES, KVM stored/loaded host debug registers upon switching to/from a VM. Changing those registers inside a running SEV VM triggered #VC exit to KVM. SEV-ES added the encrypted state (ES) which uses an encrypted guest page for the VM state (VMSA). The hardware saves/restores certain registers on VMRUN/VMEXIT according to a swap type (A, B, C), see "Table B-3. Swap Types" in the AMD Architecture Programmer’s Manual volume 2. AMD Milan (Fam 19h) introduces support for the debug registers swapping. DR6 and DR7 are always swapped. DR[0-3] and DR[0-3]_ADDR_MASK are swapped a type B when SEV_FEATURES[5] ("DebugSwap") is set. Enable DebugSwap in VMSA. But only do so if CPUID Fn80000021_EAX[0] ("NoNestedDataBp", "Processor ignores nested data breakpoints") is supported by the SOC as otherwise a malicious SEV-ES guest can set up data breakpoints on the #VC IDT entry/stack and cause an infinite loop. Eliminate DR7 and #DB intercepts as: - they are not needed when DebugSwap is supported; - #VC for these intercepts is most likely not supported anyway and kills the VM. Keep DR7 intercepted unless DebugSwap enabled to prevent the infinite #DB loop DoS. While at this, move set_/clr_dr_intercepts to .c and move #DB intercept next to DR7 intercept. Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxx> --- Changes: v4: * removed sev_es_is_debug_swap_enabled() helper * made sev_es_debug_swap_enabled (module param) static * set sev_feature early in sev_es_init_vmcb() and made intercepts dependend on it vs. module param * move set_/clr_dr_intercepts to .c v3: * rewrote the commit log again * rebased on tip/master to use recently defined X86_FEATURE_NO_NESTED_DATA_BP * s/boot_cpu_has/cpu_feature_enabled/ v2: * debug_swap moved from vcpu to module_param * rewrote commit log --- Tested with: === int x; int main(int argc, char *argv[]) { x = 1; return 0; } === gcc -g a.c rsync a.out ruby-954vm:~/ ssh -t ruby-954vm 'gdb -ex "file a.out" -ex "watch x" -ex r' where ruby-954vm is a VM. With "/sys/module/kvm_amd/parameters/debug_swap = 0", gdb does not stop on the watchpoint, with "= 1" - gdb does. --- arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm/svm.h | 42 ------------- arch/x86/kvm/svm/sev.c | 24 ++++++++ arch/x86/kvm/svm/svm.c | 65 +++++++++++++++++++- 4 files changed, 87 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index cb1ee53ad3b1..665515c7edae 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -278,6 +278,7 @@ enum avic_ipi_failure_cause { #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL +#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5) struct vmcb_seg { u16 selector; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 4826e6cc611b..653fd09929df 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -389,48 +389,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3 return test_bit(bit, (unsigned long *)&control->intercepts); } -static inline void set_dr_intercepts(struct vcpu_svm *svm) -{ - struct vmcb *vmcb = svm->vmcb01.ptr; - - if (!sev_es_guest(svm->vcpu.kvm)) { - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); - } - - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); - - recalc_intercepts(svm); -} - -static inline void clr_dr_intercepts(struct vcpu_svm *svm) -{ - struct vmcb *vmcb = svm->vmcb01.ptr; - - vmcb->control.intercepts[INTERCEPT_DR] = 0; - - /* DR7 access must remain intercepted for an SEV-ES guest */ - if (sev_es_guest(svm->vcpu.kvm)) { - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); - } - - recalc_intercepts(svm); -} - static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit) { struct vmcb *vmcb = svm->vmcb01.ptr; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 86d6897f4806..af775410c5eb 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -21,6 +21,7 @@ #include <asm/pkru.h> #include <asm/trapnr.h> #include <asm/fpu/xcr.h> +#include <asm/debugreg.h> #include "mmu.h" #include "x86.h" @@ -52,9 +53,14 @@ module_param_named(sev, sev_enabled, bool, 0444); /* enable/disable SEV-ES support */ static bool sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); + +/* enable/disable SEV-ES DebugSwap support */ +static bool sev_es_debug_swap_enabled = true; +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644); #else #define sev_enabled false #define sev_es_enabled false +#define sev_es_debug_swap false #endif /* CONFIG_KVM_AMD_SEV */ static u8 sev_enc_bit; @@ -2249,6 +2255,8 @@ void __init sev_hardware_setup(void) out: sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; + if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) + sev_es_debug_swap_enabled = false; #endif } @@ -2940,6 +2948,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; + struct sev_es_save_area *save = svm->sev_es.vmsa; svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; @@ -2988,6 +2997,9 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP)) svm_clr_intercept(svm, INTERCEPT_RDTSCP); } + + if (sev_es_debug_swap_enabled) + save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; } void sev_init_vmcb(struct vcpu_svm *svm) @@ -3027,6 +3039,18 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ hostsa->xss = host_xss; + + /* The DebugSwap SEV feature does Type B swaps of DR[0-3] */ + if (sev_es_debug_swap_enabled) { + hostsa->dr0 = native_get_debugreg(0); + hostsa->dr1 = native_get_debugreg(1); + hostsa->dr2 = native_get_debugreg(2); + hostsa->dr3 = native_get_debugreg(3); + hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); + hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); + hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); + hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3); + } } void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 60c7c880266b..f8e222bee22a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -671,6 +671,65 @@ static int svm_cpu_init(int cpu) } +static void set_dr_intercepts(struct vcpu_svm *svm) +{ + struct vmcb *vmcb = svm->vmcb01.ptr; + bool intercept; + + if (!sev_es_guest(svm->vcpu.kvm)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); + } + + if (sev_es_guest(svm->vcpu.kvm)) { + struct sev_es_save_area *save = svm->sev_es.vmsa; + + intercept = !(save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP); + } else { + intercept = true; + } + + if (intercept) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + set_exception_intercept(svm, DB_VECTOR); + } + + recalc_intercepts(svm); +} + +static void clr_dr_intercepts(struct vcpu_svm *svm) +{ + struct vmcb *vmcb = svm->vmcb01.ptr; + struct sev_es_save_area *save = svm->sev_es.vmsa; + + vmcb->control.intercepts[INTERCEPT_DR] = 0; + + /* + * DR7 access must remain intercepted for an SEV-ES guest unless DebugSwap + * (depends on NO_NESTED_DATA_BP) is enabled as otherwise a VM writing to DR7 + * from the #DB handler may trigger infinite loop of #DB's. + */ + if (sev_es_guest(svm->vcpu.kvm) && (save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + } + + recalc_intercepts(svm); +} + static int direct_access_msr_slot(u32 msr) { u32 i; @@ -1184,13 +1243,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu) if (!kvm_vcpu_apicv_active(vcpu)) svm_set_intercept(svm, INTERCEPT_CR8_WRITE); - set_dr_intercepts(svm); - set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, AC_VECTOR); - set_exception_intercept(svm, DB_VECTOR); + /* * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. @@ -1308,6 +1365,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) if (sev_guest(vcpu->kvm)) sev_init_vmcb(svm); + set_dr_intercepts(svm); + svm_hv_init_vmcb(vmcb); init_vmcb_after_set_cpuid(vcpu); -- 2.39.1