From: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> The guest DR7 is loaded before VM enter to enable debugging functions for the guest. If guest debugging is not enabled, the #DB and #BP exceptions are reinjected into the guest directly; otherwise, they are handled by the hypervisor. However, DR7_GD is cleared since debug register read/write is a privileged instruction, which always leads to a VM exit for #GP. The address of breakpoints is limited to the allowed address range, similar to the check in the #PF path. Guest DR7 is loaded before VM enter to enable debug function for guest. If guest debug is not enabled, the #DB and #BP are reinjected into guest directly, otherwise, they are handled by hypervisor similar to VMX. Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx> --- arch/x86/kvm/pvm/pvm.c | 96 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/pvm/pvm.h | 3 ++ 2 files changed, 99 insertions(+) diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c index 4ec8c2c514ca..299305903005 100644 --- a/arch/x86/kvm/pvm/pvm.c +++ b/arch/x86/kvm/pvm/pvm.c @@ -383,6 +383,8 @@ static void pvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_pvm *pvm = to_pvm(vcpu); + pvm->host_debugctlmsr = get_debugctlmsr(); + if (__this_cpu_read(active_pvm_vcpu) == pvm && vcpu->cpu == cpu) return; @@ -533,6 +535,9 @@ static int pvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: msr_info->data = pvm->unused_MSR_IA32_SYSENTER_ESP; break; + case MSR_IA32_DEBUGCTLMSR: + msr_info->data = 0; + break; case MSR_PVM_VCPU_STRUCT: msr_info->data = pvm->msr_vcpu_struct; break; @@ -619,6 +624,9 @@ static int pvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: pvm->unused_MSR_IA32_SYSENTER_ESP = data; break; + case MSR_IA32_DEBUGCTLMSR: + /* It is ignored now. */ + break; case MSR_PVM_VCPU_STRUCT: if (!PAGE_ALIGNED(data)) return 1; @@ -810,6 +818,10 @@ static bool pvm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) return false; } +static void update_exception_bitmap(struct kvm_vcpu *vcpu) +{ +} + static struct pvm_vcpu_struct *pvm_get_vcpu_struct(struct vcpu_pvm *pvm) { struct gfn_to_pfn_cache *gpc = &pvm->pvcs_gpc; @@ -1235,6 +1247,72 @@ static int pvm_vcpu_pre_run(struct kvm_vcpu *vcpu) return 1; } +static void pvm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) +{ + WARN_ONCE(1, "pvm never sets KVM_DEBUGREG_WONT_EXIT\n"); +} + +static void pvm_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) +{ + to_pvm(vcpu)->guest_dr7 = val; +} + +static __always_inline unsigned long __dr7_enable_mask(int drnum) +{ + unsigned long bp_mask = 0; + + bp_mask |= (DR_LOCAL_ENABLE << (drnum * DR_ENABLE_SIZE)); + bp_mask |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); + + return bp_mask; +} + +static __always_inline unsigned long __dr7_mask(int drnum) +{ + unsigned long bp_mask = 0xf; + + bp_mask <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_mask |= __dr7_enable_mask(drnum); + + return bp_mask; +} + +/* + * Calculate the correct dr7 for the hardware to avoid the host + * being watched. + * + * It only needs to be calculated each time when vcpu->arch.eff_db or + * pvm->guest_dr7 is changed. But now it is calculated each time on + * VM-enter since there is no proper callback for vcpu->arch.eff_db and + * it is slow path. + */ +static __always_inline unsigned long pvm_eff_dr7(struct kvm_vcpu *vcpu) +{ + unsigned long eff_dr7 = to_pvm(vcpu)->guest_dr7; + int i; + + /* + * DR7_GD should not be set to hardware. And it doesn't need to be + * set to hardware since PVM guest is running on hardware ring3. + * All access to debug registers will be trapped and the emulation + * code can handle DR7_GD correctly for PVM. + */ + eff_dr7 &= ~DR7_GD; + + /* + * Disallow addresses that are not for the guest, especially addresses + * on the host entry code. + */ + for (i = 0; i < KVM_NR_DB_REGS; i++) { + if (!pvm_guest_allowed_va(vcpu, vcpu->arch.eff_db[i])) + eff_dr7 &= ~__dr7_mask(i); + if (!pvm_guest_allowed_va(vcpu, vcpu->arch.eff_db[i] + 7)) + eff_dr7 &= ~__dr7_mask(i); + } + + return eff_dr7; +} + // Save guest registers from host sp0 or IST stack. static __always_inline void save_regs(struct kvm_vcpu *vcpu, struct pt_regs *guest) { @@ -1301,6 +1379,9 @@ static noinstr void pvm_vcpu_run_noinstr(struct kvm_vcpu *vcpu) // Load guest registers into the host sp0 stack for switcher. load_regs(vcpu, sp0_regs); + if (unlikely(pvm->guest_dr7 & DR7_BP_EN_MASK)) + set_debugreg(pvm_eff_dr7(vcpu), 7); + // Call into switcher and enter guest. ret_regs = switcher_enter_guest(); @@ -1309,6 +1390,11 @@ static noinstr void pvm_vcpu_run_noinstr(struct kvm_vcpu *vcpu) pvm->exit_vector = (ret_regs->orig_ax >> 32); pvm->exit_error_code = (u32)ret_regs->orig_ax; + // dr7 requires to be zero when the controling of debug registers + // passes back to the host. + if (unlikely(pvm->guest_dr7 & DR7_BP_EN_MASK)) + set_debugreg(0, 7); + // handle noinstr vmexits reasons. switch (pvm->exit_vector) { case PF_VECTOR: @@ -1387,8 +1473,15 @@ static fastpath_t pvm_vcpu_run(struct kvm_vcpu *vcpu) pvm_set_host_cr3(pvm); + if (pvm->host_debugctlmsr) + update_debugctlmsr(0); + pvm_vcpu_run_noinstr(vcpu); + /* MSR_IA32_DEBUGCTLMSR is zeroed before vmenter. Restore it if needed */ + if (pvm->host_debugctlmsr) + update_debugctlmsr(pvm->host_debugctlmsr); + if (is_smod(pvm)) { struct pvm_vcpu_struct *pvcs = pvm->pvcs_gpc.khva; @@ -1696,6 +1789,7 @@ static struct kvm_x86_ops pvm_x86_ops __initdata = { .vcpu_load = pvm_vcpu_load, .vcpu_put = pvm_vcpu_put, + .update_exception_bitmap = update_exception_bitmap, .get_msr_feature = pvm_get_msr_feature, .get_msr = pvm_get_msr, .set_msr = pvm_set_msr, @@ -1709,6 +1803,8 @@ static struct kvm_x86_ops pvm_x86_ops __initdata = { .set_gdt = pvm_set_gdt, .get_idt = pvm_get_idt, .set_idt = pvm_set_idt, + .set_dr7 = pvm_set_dr7, + .sync_dirty_debug_regs = pvm_sync_dirty_debug_regs, .get_rflags = pvm_get_rflags, .set_rflags = pvm_set_rflags, .get_if_flag = pvm_get_if_flag, diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h index bf3a6a1837c0..4cdcbed1c813 100644 --- a/arch/x86/kvm/pvm/pvm.h +++ b/arch/x86/kvm/pvm/pvm.h @@ -37,6 +37,7 @@ struct vcpu_pvm { unsigned long switch_flags; u16 host_ds_sel, host_es_sel; + u64 host_debugctlmsr; union { unsigned long exit_extra; @@ -52,6 +53,8 @@ struct vcpu_pvm { int int_shadow; bool nmi_mask; + unsigned long guest_dr7; + struct gfn_to_pfn_cache pvcs_gpc; // emulated x86 msrs -- 2.19.1.6.gb485710b