AMD Milan (Fam 19h) introduces support for the swapping, as type 'B', of DR[0-3] and DR[0-3]_ADDR_MASK registers. Software enables this by setting SEV_FEATURES[5] (called "DebugSwap") in the VMSA which makes data breakpoints work in SEV-ES VMs. For type 'B' swaps the hardware saves/restores the VM state on VMEXIT/VMRUN in VMSA, and restores the host state on VMEXIT. Enable DebugSwap in VMSA but only if CPUID Fn80000021_EAX[0] ("NoNestedDataBp", "Processor ignores nested data breakpoints") is supported by the SOC as otherwise a malicious guest can cause the infinite #DB loop DoS. Save DR[0-3] / DR[0-3]_ADDR_MASK in the host save area before VMRUN as type 'B' swap does not do this part. Eliminate DR7 and #DB intercepts as: - they are not needed when DebugSwap is supported; - #VC for these intercepts is most likely not supported anyway and kills the VM. Keep DR7 intercepted unless DebugSwap enabled to prevent the infinite #DB loop DoS. Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxx> --- Changes: v2: * debug_swap moved from vcpu to module_param * rewrote commit log --- "DR7 access must remain intercepted for an SEV-ES guest" - I could not figure out the exact reasoning why it is there in the first place, IIUC this is to prevent loop of #DBs in the VM. --- Tested with: === int x; int main(int argc, char *argv[]) { x = 1; return 0; } === gcc -g a.c rsync a.out ruby-954vm:~/ ssh -t ruby-954vm 'gdb -ex "file a.out" -ex "watch x" -ex r' where ruby-954vm is a VM. With "/sys/module/kvm_amd/parameters/debug_swap = 0", gdb does not stop on the watchpoint, with "= 1" - gdb does. --- arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm/svm.h | 16 ++++++++--- arch/x86/kvm/svm/sev.c | 29 ++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 3 +- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0361626841bc..373a0edda588 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -273,6 +273,7 @@ enum avic_ipi_failure_cause { #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL +#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5) struct vmcb_seg { u16 selector; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 199a2ecef1ce..0fae611abe4a 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -386,6 +386,8 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3 return test_bit(bit, (unsigned long *)&control->intercepts); } +extern bool sev_es_is_debug_swap_enabled(void); + static inline void set_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb01.ptr; @@ -407,8 +409,10 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm) vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); } - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + if (!sev_es_guest(svm->vcpu.kvm) || !sev_es_is_debug_swap_enabled()) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + } recalc_intercepts(svm); } @@ -419,8 +423,12 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm) vmcb->control.intercepts[INTERCEPT_DR] = 0; - /* DR7 access must remain intercepted for an SEV-ES guest */ - if (sev_es_guest(svm->vcpu.kvm)) { + /* + * DR7 access must remain intercepted for an SEV-ES guest unless DebugSwap + * (depends on NO_NESTED_DATA_BP) is enabled as otherwise a VM writing to DR7 + * from the #DB handler may trigger infinite loop of #DB's. + */ + if (sev_es_guest(svm->vcpu.kvm) && !sev_es_is_debug_swap_enabled()) { vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index efaaef2b7ae1..800ea2a778cc 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -21,6 +21,7 @@ #include <asm/pkru.h> #include <asm/trapnr.h> #include <asm/fpu/xcr.h> +#include <asm/debugreg.h> #include "mmu.h" #include "x86.h" @@ -52,11 +53,21 @@ module_param_named(sev, sev_enabled, bool, 0444); /* enable/disable SEV-ES support */ static bool sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); + +/* enable/disable SEV-ES DebugSwap support */ +static bool sev_es_debug_swap_enabled = true; +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644); #else #define sev_enabled false #define sev_es_enabled false +#define sev_es_debug_swap false #endif /* CONFIG_KVM_AMD_SEV */ +bool sev_es_is_debug_swap_enabled(void) +{ + return sev_es_debug_swap_enabled; +} + static u8 sev_enc_bit; static DECLARE_RWSEM(sev_deactivate_lock); static DEFINE_MUTEX(sev_bitmap_lock); @@ -604,6 +615,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; + if (sev_es_is_debug_swap_enabled()) + save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; + pr_debug("Virtual Machine Save Area (VMSA):\n"); print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); @@ -2249,6 +2263,9 @@ void __init sev_hardware_setup(void) out: sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; + if (sev_es_debug_swap_enabled) + sev_es_debug_swap_enabled = sev_es_enabled && + boot_cpu_has(X86_FEATURE_NO_NESTED_DATA_BP); #endif } @@ -3027,6 +3044,18 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ hostsa->xss = host_xss; + + /* The DebugSwap SEV feature does Type B swaps of DR[0-3] */ + if (sev_es_is_debug_swap_enabled()) { + hostsa->dr0 = native_get_debugreg(0); + hostsa->dr1 = native_get_debugreg(1); + hostsa->dr2 = native_get_debugreg(2); + hostsa->dr3 = native_get_debugreg(3); + hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); + hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); + hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); + hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3); + } } void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ce362e88a567..697804d46545 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1189,7 +1189,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, AC_VECTOR); - set_exception_intercept(svm, DB_VECTOR); + if (!sev_es_is_debug_swap_enabled()) + set_exception_intercept(svm, DB_VECTOR); /* * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. -- 2.38.1