--- arch/x86/include/asm/kvm-x86-ops.h | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.h | 2 + arch/x86/kvm/svm/svm.c | 310 +++++++++-------------------- arch/x86/kvm/svm/svm.h | 6 - arch/x86/kvm/vmx/main.c | 2 +- arch/x86/kvm/vmx/vmx.c | 179 ++++------------- arch/x86/kvm/vmx/vmx.h | 9 - arch/x86/kvm/vmx/x86_ops.h | 2 +- arch/x86/kvm/x86.c | 10 +- 10 files changed, 147 insertions(+), 377 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 5aff7222e40f..8750fc49434b 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -131,7 +131,7 @@ KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) -KVM_X86_OP(msr_filter_changed) +KVM_X86_OP(refresh_msr_intercepts) KVM_X86_OP(complete_emulated_msr) KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e159e44a6a1b..a0854c1dbb3e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1819,7 +1819,7 @@ struct kvm_x86_ops { int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); - void (*msr_filter_changed)(struct kvm_vcpu *vcpu); + void (*refresh_msr_intercepts)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 24add38beaf0..150fcaa8430f 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -21,6 +21,8 @@ #define APIC_BROADCAST 0xFF #define X2APIC_BROADCAST 0xFFFFFFFFul +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + enum lapic_mode { LAPIC_MODE_DISABLED = 0, LAPIC_MODE_INVALID = X2APIC_ENABLE, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3813258497e4..0b2a88251f10 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -79,69 +79,6 @@ static uint64_t osvw_len = 4, osvw_status; static DEFINE_PER_CPU(u64, current_tsc_ratio); -#define X2APIC_MSR(x) (APIC_BASE_MSR + (x >> 4)) - -static const u32 direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = { - MSR_STAR, - MSR_IA32_SYSENTER_CS, - MSR_IA32_SYSENTER_EIP, - MSR_IA32_SYSENTER_ESP, -#ifdef CONFIG_X86_64 - MSR_GS_BASE, - MSR_FS_BASE, - MSR_KERNEL_GS_BASE, - MSR_LSTAR, - MSR_CSTAR, - MSR_SYSCALL_MASK, -#endif - MSR_IA32_SPEC_CTRL, - MSR_IA32_PRED_CMD, - MSR_IA32_FLUSH_CMD, - MSR_IA32_DEBUGCTLMSR, - MSR_IA32_LASTBRANCHFROMIP, - MSR_IA32_LASTBRANCHTOIP, - MSR_IA32_LASTINTFROMIP, - MSR_IA32_LASTINTTOIP, - MSR_IA32_XSS, - MSR_EFER, - MSR_IA32_CR_PAT, - MSR_AMD64_SEV_ES_GHCB, - MSR_TSC_AUX, - X2APIC_MSR(APIC_ID), - X2APIC_MSR(APIC_LVR), - X2APIC_MSR(APIC_TASKPRI), - X2APIC_MSR(APIC_ARBPRI), - X2APIC_MSR(APIC_PROCPRI), - X2APIC_MSR(APIC_EOI), - X2APIC_MSR(APIC_RRR), - X2APIC_MSR(APIC_LDR), - X2APIC_MSR(APIC_DFR), - X2APIC_MSR(APIC_SPIV), - X2APIC_MSR(APIC_ISR), - X2APIC_MSR(APIC_TMR), - X2APIC_MSR(APIC_IRR), - X2APIC_MSR(APIC_ESR), - X2APIC_MSR(APIC_ICR), - X2APIC_MSR(APIC_ICR2), - - /* - * Note: - * AMD does not virtualize APIC TSC-deadline timer mode, but it is - * emulated by KVM. When setting APIC LVTT (0x832) register bit 18, - * the AVIC hardware would generate GP fault. Therefore, always - * intercept the MSR 0x832, and do not setup direct_access_msr. - */ - X2APIC_MSR(APIC_LVTTHMR), - X2APIC_MSR(APIC_LVTPC), - X2APIC_MSR(APIC_LVT0), - X2APIC_MSR(APIC_LVT1), - X2APIC_MSR(APIC_LVTERR), - X2APIC_MSR(APIC_TMICT), - X2APIC_MSR(APIC_TMCCT), - X2APIC_MSR(APIC_TDCR), - MSR_INVALID, -}; - /* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * pause_filter_count: On processors that support Pause filtering(indicated @@ -756,18 +693,6 @@ static void clr_dr_intercepts(struct vcpu_svm *svm) recalc_intercepts(svm); } -static int direct_access_msr_slot(u32 msr) -{ - u32 i; - - for (i = 0; direct_access_msrs[i] != MSR_INVALID; i++) { - if (direct_access_msrs[i] == msr) - return i; - } - - return -ENOENT; -} - static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) { u8 bit_write; @@ -831,17 +756,6 @@ BUILD_SVM_MSR_BITMAP_HELPER(svm_clear_msr_bitmap_write, __clear_bit, write) void svm_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) { struct vcpu_svm *svm = to_svm(vcpu); - int slot; - - slot = direct_access_msr_slot(msr); - WARN_ON(slot == -ENOENT); - if (slot >= 0) { - /* Set the shadow bitmaps to the desired intercept states */ - if (type & MSR_TYPE_R) - __clear_bit(slot, svm->shadow_msr_intercept.read); - if (type & MSR_TYPE_W) - __clear_bit(slot, svm->shadow_msr_intercept.write); - } /* * Don't disabled interception for the MSR if userspace wants to @@ -870,17 +784,6 @@ void svm_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) void svm_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) { struct vcpu_svm *svm = to_svm(vcpu); - int slot; - - slot = direct_access_msr_slot(msr); - WARN_ON(slot == -ENOENT); - if (slot >= 0) { - /* Set the shadow bitmaps to the desired intercept states */ - if (type & MSR_TYPE_R) - __set_bit(slot, svm->shadow_msr_intercept.read); - if (type & MSR_TYPE_W) - __set_bit(slot, svm->shadow_msr_intercept.write); - } if (type & MSR_TYPE_R) svm_set_msr_bitmap_read(vcpu, msr); @@ -907,6 +810,20 @@ unsigned long *svm_vcpu_alloc_msrpm(void) return msrpm; } +static void svm_refresh_lbr_msr_intercepts(struct kvm_vcpu *vcpu) +{ + bool intercept = !(to_svm(vcpu)->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK); + + svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW, intercept); + svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW, intercept); + svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTFROMIP, MSR_TYPE_RW, intercept); + svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTTOIP, MSR_TYPE_RW, intercept); + + if (sev_es_guest(vcpu->kvm)) + svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept); + +} + void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, unsigned long *msrpm) { svm_disable_intercept_for_msr(vcpu, MSR_STAR, MSR_TYPE_RW); @@ -924,8 +841,76 @@ void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, unsigned long *msrpm) svm_disable_intercept_for_msr(vcpu, MSR_AMD64_SEV_ES_GHCB, MSR_TYPE_RW); } +static void svm_refresh_msr_intercepts(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm_vcpu_init_msrpm(vcpu, svm->msrpm); + + if (lbrv) + svm_refresh_lbr_msr_intercepts(vcpu); + + if (boot_cpu_has(X86_FEATURE_IBPB) && guest_has_pred_cmd_msr(vcpu)) + svm_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W); + + if (boot_cpu_has(X86_FEATURE_FLUSH_L1D) && guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)) + svm_disable_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W); + + /* + * If the host supports V_SPEC_CTRL then disable the interception + * of MSR_IA32_SPEC_CTRL. + */ + if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL)) + svm_disable_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); + else + svm_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW, !svm->spec_ctrl); + + /* + * Intercept SYSENTER_EIP and SYSENTER_ESP when emulating an Intel CPU, + * as AMD hardware only store 32 bits, whereas Intel CPUs track 64 bits. + */ + svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW, + guest_cpuid_is_intel_compatible(vcpu)); + svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW, + guest_cpuid_is_intel_compatible(vcpu)); +} + void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) { + static const u32 x2avic_passthrough_msrs[] = { + X2APIC_MSR(APIC_ID), + X2APIC_MSR(APIC_LVR), + X2APIC_MSR(APIC_TASKPRI), + X2APIC_MSR(APIC_ARBPRI), + X2APIC_MSR(APIC_PROCPRI), + X2APIC_MSR(APIC_EOI), + X2APIC_MSR(APIC_RRR), + X2APIC_MSR(APIC_LDR), + X2APIC_MSR(APIC_DFR), + X2APIC_MSR(APIC_SPIV), + X2APIC_MSR(APIC_ISR), + X2APIC_MSR(APIC_TMR), + X2APIC_MSR(APIC_IRR), + X2APIC_MSR(APIC_ESR), + X2APIC_MSR(APIC_ICR), + X2APIC_MSR(APIC_ICR2), + + /* + * Note: + * AMD does not virtualize APIC TSC-deadline timer mode, but it is + * emulated by KVM. When setting APIC LVTT (0x832) register bit 18, + * the AVIC hardware would generate GP fault. Therefore, always + * intercept the MSR 0x832, and do not setup direct_access_msr. + */ + X2APIC_MSR(APIC_LVTTHMR), + X2APIC_MSR(APIC_LVTPC), + X2APIC_MSR(APIC_LVT0), + X2APIC_MSR(APIC_LVT1), + X2APIC_MSR(APIC_LVTERR), + X2APIC_MSR(APIC_TMICT), + X2APIC_MSR(APIC_TMCCT), + X2APIC_MSR(APIC_TDCR), + }; int i; if (intercept == svm->x2avic_msrs_intercepted) @@ -934,15 +919,9 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) if (!x2avic_enabled) return; - for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) { - int index = direct_access_msrs[i]; - - if ((index < APIC_BASE_MSR) || - (index > APIC_BASE_MSR + 0xff)) - continue; - - svm_set_intercept_for_msr(&svm->vcpu, index, MSR_TYPE_RW, intercept); - } + for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++) + svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i], + MSR_TYPE_RW, intercept); svm->x2avic_msrs_intercepted = intercept; } @@ -952,73 +931,6 @@ void svm_vcpu_free_msrpm(unsigned long *msrpm) __free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE)); } -static void svm_msr_filter_changed(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u32 i; - - /* - * Redo intercept permissions for MSRs that KVM is passing through to - * the guest. Disabling interception will check the new MSR filter and - * ensure that KVM enables interception if usersepace wants to filter - * the MSR. MSRs that KVM is already intercepting don't need to be - * refreshed since KVM is going to intercept them regardless of what - * userspace wants. - */ - for (i = 0; direct_access_msrs[i] != MSR_INVALID; i++) { - u32 msr = direct_access_msrs[i]; - - if (!test_bit(i, svm->shadow_msr_intercept.read)) - svm_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R); - - if (!test_bit(i, svm->shadow_msr_intercept.write)) - svm_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W); - } -} - -static void add_msr_offset(u32 offset) -{ - int i; - - for (i = 0; i < MSRPM_OFFSETS; ++i) { - - /* Offset already in list? */ - if (msrpm_offsets[i] == offset) - return; - - /* Slot used by another offset? */ - if (msrpm_offsets[i] != MSR_INVALID) - continue; - - /* Add offset to list */ - msrpm_offsets[i] = offset; - - return; - } - - /* - * If this BUG triggers the msrpm_offsets table has an overflow. Just - * increase MSRPM_OFFSETS in this case. - */ - BUG(); -} - -static void init_msrpm_offsets(void) -{ - int i; - - memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); - - for (i = 0; direct_access_msrs[i] != MSR_INVALID; i++) { - u32 offset; - - offset = svm_msrpm_offset(direct_access_msrs[i]); - BUG_ON(offset == MSR_INVALID); - - add_msr_offset(offset); - } -} - void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) { to_vmcb->save.dbgctl = from_vmcb->save.dbgctl; @@ -1035,13 +947,7 @@ void svm_enable_lbrv(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - svm_disable_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW); - svm_disable_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW); - svm_disable_intercept_for_msr(vcpu, MSR_IA32_LASTINTFROMIP, MSR_TYPE_RW); - svm_disable_intercept_for_msr(vcpu, MSR_IA32_LASTINTTOIP, MSR_TYPE_RW); - - if (sev_es_guest(vcpu->kvm)) - svm_disable_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW); + svm_refresh_lbr_msr_intercepts(vcpu); /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ if (is_guest_mode(vcpu)) @@ -1053,12 +959,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); - svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; - svm_enable_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW); - svm_enable_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW); - svm_enable_intercept_for_msr(vcpu, MSR_IA32_LASTINTFROMIP, MSR_TYPE_RW); - svm_enable_intercept_for_msr(vcpu, MSR_IA32_LASTINTTOIP, MSR_TYPE_RW); + svm_refresh_lbr_msr_intercepts(vcpu); /* * Move the LBR msrs back to the vmcb01 to avoid copying them @@ -1241,17 +1143,9 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (guest_cpuid_is_intel_compatible(vcpu)) { - /* - * We must intercept SYSENTER_EIP and SYSENTER_ESP - * accesses because the processor only stores 32 bits. - * For the same reason we cannot use virtual VMLOAD/VMSAVE. - */ svm_set_intercept(svm, INTERCEPT_VMLOAD); svm_set_intercept(svm, INTERCEPT_VMSAVE); svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; - - svm_enable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - svm_enable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); } else { /* * If hardware supports Virtual VMLOAD VMSAVE then enable it @@ -1262,9 +1156,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu) svm_clr_intercept(svm, INTERCEPT_VMSAVE); svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; } - /* No need to intercept these MSRs */ - svm_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - svm_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); } } @@ -1388,13 +1279,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_recalc_instruction_intercepts(vcpu, svm); - /* - * If the host supports V_SPEC_CTRL then disable the interception - * of MSR_IA32_SPEC_CTRL. - */ - if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL)) - svm_disable_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); - if (kvm_vcpu_apicv_active(vcpu)) avic_init_vmcb(svm, vmcb); @@ -1422,8 +1306,6 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - svm_vcpu_init_msrpm(vcpu, svm->msrpm); - svm_init_osvw(vcpu); if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) @@ -1448,6 +1330,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) sev_snp_init_protected_guest_state(vcpu); init_vmcb(vcpu); + svm_refresh_msr_intercepts(vcpu); if (!init_event) __svm_vcpu_reset(vcpu); @@ -1488,10 +1371,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) if (err) goto error_free_vmsa_page; - /* All MSRs start out in the "intercepted" state. */ - bitmap_fill(svm->shadow_msr_intercept.read, MAX_DIRECT_ACCESS_MSRS); - bitmap_fill(svm->shadow_msr_intercept.write, MAX_DIRECT_ACCESS_MSRS); - svm->msrpm = svm_vcpu_alloc_msrpm(); if (!svm->msrpm) { err = -ENOMEM; @@ -3193,8 +3072,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) /* * TSC_AUX is usually changed only during boot and never read - * directly. Intercept TSC_AUX instead of exposing it to the - * guest via direct_access_msrs, and switch it via user return. + * directly. Intercept TSC_AUX and switch it via user return. */ preempt_disable(); ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull); @@ -4465,12 +4343,6 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) svm_recalc_instruction_intercepts(vcpu, svm); - if (boot_cpu_has(X86_FEATURE_IBPB) && guest_has_pred_cmd_msr(vcpu)) - svm_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W); - - if (boot_cpu_has(X86_FEATURE_FLUSH_L1D) && guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)) - svm_disable_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W); - if (sev_guest(vcpu->kvm)) sev_vcpu_after_set_cpuid(svm); @@ -5166,7 +5038,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, - .msr_filter_changed = svm_msr_filter_changed, + .refresh_msr_intercepts = svm_refresh_msr_intercepts, .complete_emulated_msr = svm_complete_emulated_msr, .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, @@ -5324,8 +5196,6 @@ static __init int svm_hardware_setup(void) memset(iopm_va, 0xff, PAGE_SIZE * (1 << order)); iopm_base = __sme_page_pa(iopm_pages); - init_msrpm_offsets(); - kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 2513990c5b6e..a73da8ca73b4 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -313,12 +313,6 @@ struct vcpu_svm { struct list_head ir_list; spinlock_t ir_list_lock; - /* Save desired MSR intercept (read: pass-through) state */ - struct { - DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS); - DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); - } shadow_msr_intercept; - struct vcpu_sev_es_state sev_es; bool guest_state_loaded; diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 92d35cc6cd15..915df0f5f1eb 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -152,7 +152,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .apic_init_signal_blocked = vmx_apic_init_signal_blocked, .migrate_timers = vmx_migrate_timers, - .msr_filter_changed = vmx_msr_filter_changed, + .refresh_msr_intercepts = vmx_refresh_msr_intercepts, .complete_emulated_msr = kvm_complete_insn_gp, .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0701bf32e59e..88f71b66e673 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -163,31 +163,6 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO); RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \ RTIT_STATUS_BYTECNT)) -/* - * List of MSRs that can be directly passed to the guest. - * In addition to these x2apic, PT and LBR MSRs are handled specially. - */ -static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_PRED_CMD, - MSR_IA32_FLUSH_CMD, - MSR_IA32_TSC, -#ifdef CONFIG_X86_64 - MSR_FS_BASE, - MSR_GS_BASE, - MSR_KERNEL_GS_BASE, - MSR_IA32_XFD, - MSR_IA32_XFD_ERR, -#endif - MSR_IA32_SYSENTER_CS, - MSR_IA32_SYSENTER_ESP, - MSR_IA32_SYSENTER_EIP, - MSR_CORE_C1_RES, - MSR_CORE_C3_RESIDENCY, - MSR_CORE_C6_RESIDENCY, - MSR_CORE_C7_RESIDENCY, -}; - /* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * ple_gap: upper bound on the amount of time between two successive @@ -669,40 +644,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) return flexpriority_enabled && lapic_in_kernel(vcpu); } -static int vmx_get_passthrough_msr_slot(u32 msr) -{ - int i; - - switch (msr) { - case 0x800 ... 0x8ff: - /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */ - return -ENOENT; - case MSR_IA32_RTIT_STATUS: - case MSR_IA32_RTIT_OUTPUT_BASE: - case MSR_IA32_RTIT_OUTPUT_MASK: - case MSR_IA32_RTIT_CR3_MATCH: - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - /* PT MSRs. These are handled in pt_update_intercept_for_msr() */ - case MSR_LBR_SELECT: - case MSR_LBR_TOS: - case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31: - case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31: - case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31: - case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8: - case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8: - /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */ - return -ENOENT; - } - - for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { - if (vmx_possible_passthrough_msrs[i] == msr) - return i; - } - - WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr); - return -ENOENT; -} - struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -4002,25 +3943,12 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - int idx; if (!cpu_has_vmx_msr_bitmap()) return; vmx_msr_bitmap_l01_changed(vmx); - /* - * Mark the desired intercept state in shadow bitmap, this is needed - * for resync when the MSR filters change. - */ - idx = vmx_get_passthrough_msr_slot(msr); - if (idx >= 0) { - if (type & MSR_TYPE_R) - __clear_bit(idx, vmx->shadow_msr_intercept.read); - if (type & MSR_TYPE_W) - __clear_bit(idx, vmx->shadow_msr_intercept.write); - } - if ((type & MSR_TYPE_R) && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) { vmx_set_msr_bitmap_read(msr_bitmap, msr); @@ -4044,25 +3972,12 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - int idx; if (!cpu_has_vmx_msr_bitmap()) return; vmx_msr_bitmap_l01_changed(vmx); - /* - * Mark the desired intercept state in shadow bitmap, this is needed - * for resync when the MSR filter changes. - */ - idx = vmx_get_passthrough_msr_slot(msr); - if (idx >= 0) { - if (type & MSR_TYPE_R) - __set_bit(idx, vmx->shadow_msr_intercept.read); - if (type & MSR_TYPE_W) - __set_bit(idx, vmx->shadow_msr_intercept.write); - } - if (type & MSR_TYPE_R) vmx_set_msr_bitmap_read(msr_bitmap, msr); @@ -4146,35 +4061,54 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } -void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) +void vmx_refresh_msr_intercepts(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 i; - if (!cpu_has_vmx_msr_bitmap()) return; - /* - * Redo intercept permissions for MSRs that KVM is passing through to - * the guest. Disabling interception will check the new MSR filter and - * ensure that KVM enables interception if usersepace wants to filter - * the MSR. MSRs that KVM is already intercepting don't need to be - * refreshed since KVM is going to intercept them regardless of what - * userspace wants. - */ - for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { - u32 msr = vmx_possible_passthrough_msrs[i]; - - if (!test_bit(i, vmx->shadow_msr_intercept.read)) - vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R); - - if (!test_bit(i, vmx->shadow_msr_intercept.write)) - vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W); + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); +#ifdef CONFIG_X86_64 + vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); +#endif + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + if (kvm_cstate_in_guest(vcpu->kvm)) { + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R); + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); } /* PT MSRs can be passed through iff PT is exposed to the guest. */ if (vmx_pt_mode_is_host_guest()) pt_update_intercept_for_msr(vcpu); + + if (vcpu->arch.xfd_no_write_intercept) + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, MSR_TYPE_RW); + + + vmx_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW, + !to_vmx(vcpu)->spec_ctrl); + + if (kvm_cpu_cap_has(X86_FEATURE_XFD)) + vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R, + !guest_cpuid_has(vcpu, X86_FEATURE_XFD)); + + if (boot_cpu_has(X86_FEATURE_IBPB)) + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W, + !guest_has_pred_cmd_msr(vcpu)); + + if (boot_cpu_has(X86_FEATURE_FLUSH_L1D)) + vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W, + !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); + + /* + * x2APIC and LBR MSR intercepts are modified on-demand and cannot be + * filtered by userspace. + */ } static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, @@ -7566,26 +7500,6 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu) evmcs->hv_enlightenments_control.msr_bitmap = 1; } - /* The MSR bitmap starts with all ones */ - bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); - bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); -#ifdef CONFIG_X86_64 - vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); -#endif - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(vcpu->kvm)) { - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R); - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); - } - vmx->loaded_vmcs = &vmx->vmcs01; if (cpu_need_virtualize_apic_accesses(vcpu)) { @@ -7866,18 +7780,6 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) } } - if (kvm_cpu_cap_has(X86_FEATURE_XFD)) - vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R, - !guest_cpuid_has(vcpu, X86_FEATURE_XFD)); - - if (boot_cpu_has(X86_FEATURE_IBPB)) - vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W, - !guest_has_pred_cmd_msr(vcpu)); - - if (boot_cpu_has(X86_FEATURE_FLUSH_L1D)) - vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W, - !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); - set_cr4_guest_host_mask(vmx); vmx_write_encls_bitmap(vcpu, NULL); @@ -7893,6 +7795,9 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_LC_ENABLED; + /* Refresh MSR interception to account for feature changes. */ + vmx_refresh_msr_intercepts(vcpu); + /* Refresh #PF interception to account for MAXPHYADDR changes. */ vmx_update_exception_bitmap(vcpu); } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 43f573f6ca46..d38f39935a52 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -17,8 +17,6 @@ #include "run_flags.h" #include "../mmu.h" -#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) - #ifdef CONFIG_X86_64 #define MAX_NR_USER_RETURN_MSRS 7 #else @@ -353,13 +351,6 @@ struct vcpu_vmx { struct pt_desc pt_desc; struct lbr_desc lbr_desc; - /* Save desired MSR intercept (read: pass-through) state */ -#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16 - struct { - DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); - DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - } shadow_msr_intercept; - /* ve_info must be page aligned. */ struct vmx_ve_information *ve_info; }; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index a55981c5216e..ee16bbdd9a3e 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -54,7 +54,7 @@ void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, int trig_mode, int vector); void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); -void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); +void vmx_refresh_msr_intercepts(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); int vmx_get_feature_msr(u32 msr, u64 *data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2e713480933a..5d4e049e5725 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10840,8 +10840,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_vcpu_update_apicv(vcpu); if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) kvm_check_async_pf_completion(vcpu); + + /* + * Refresh intercept permissions for MSRs that KVM is passing + * through to the guest, as userspace may want to trap accesses. + * Disabling interception will check the new MSR filter and + * ensure that KVM enables interception if usersepace wants to + * filter the MSR. + */ if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) - kvm_x86_call(msr_filter_changed)(vcpu); + kvm_x86_call(refresh_msr_intercepts)(vcpu); if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) kvm_x86_call(update_cpu_dirty_logging)(vcpu); base-commit: c109f5c273abb98684209280d4b07d596ee6a54a -- 2.47.0.338.g60cca15819-goog --NN6Sh9wlc+Qhm6Zo--