An ASI restricted address space is added for KVM. It is currently only enabled for Intel CPUs. This change incorporates an extra asi_exit at the end of vcpu_run. We expect later iterations of ASI to drop that call as we gain the ablity to context switch within the ASI domain. Signed-off-by: Brendan Jackman <jackmanb@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 36 ++++++++++++++++++++++-------------- arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++-- 4 files changed, 54 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6efd1497b0263..6c3326cb8273c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -36,6 +36,7 @@ #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> #include <asm/hyperv-tlfs.h> +#include <asm/asi.h> #define __KVM_HAVE_ARCH_VCPU_DEBUGFS @@ -1514,6 +1515,8 @@ struct kvm_arch { */ #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) struct kvm_mmu_memory_cache split_desc_cache; + + struct asi *asi; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9aaf83c8d57df..6f9a279c12dc7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4108,6 +4108,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_enter_irqoff(); amd_clear_divider(); + asi_enter(vcpu->kvm->arch.asi); if (sev_es_guest(vcpu->kvm)) __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted, @@ -4115,6 +4116,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in else __svm_vcpu_run(svm, spec_ctrl_intercepted); + asi_relax(); guest_state_exit_irqoff(); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 22411f4aff530..1105d666a8ade 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -49,6 +49,7 @@ #include <asm/mwait.h> #include <asm/spec-ctrl.h> #include <asm/vmx.h> +#include <asm/asi.h> #include <trace/events/ipi.h> @@ -7255,14 +7256,32 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, unsigned int flags) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long cr3; guest_state_enter_irqoff(); + asi_enter(vcpu->kvm->arch.asi); + + /* + * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately + * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time + * it switches back to the current->mm, which can occur in KVM context + * when switching to a temporary mm to patch kernel code, e.g. if KVM + * toggles a static key while handling a VM-Exit. + * Also, this must be done after asi_enter(), as it changes CR3 + * when switching address spaces. + */ + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } /* * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW * mitigation for MDS is done late in VMentry and is still * executed in spite of L1D Flush. This is because an extra VERW * should not matter much after the big hammer L1D Flush. + * This is only after asi_enter() for performance reasons. */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); @@ -7283,6 +7302,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx->idt_vectoring_info = 0; + asi_relax(); + vmx_enable_fb_clear(vmx); if (unlikely(vmx->fail)) { @@ -7311,7 +7332,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr3, cr4; + unsigned long cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -7354,19 +7375,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; - /* - * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately - * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time - * it switches back to the current->mm, which can occur in KVM context - * when switching to a temporary mm to patch kernel code, e.g. if KVM - * toggles a static key while handling a VM-Exit. - */ - cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { - vmcs_writel(HOST_CR3, cr3); - vmx->loaded_vmcs->host_state.cr3 = cr3; - } - cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91478b769af08..b9947e88d4ac6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -85,6 +85,7 @@ #include <asm/emulate_prefix.h> #include <asm/sgx.h> #include <clocksource/hyperv_timer.h> +#include <asm/asi.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -318,6 +319,8 @@ u64 __read_mostly host_xcr0; static struct kmem_cache *x86_emulator_cache; +static int __read_mostly kvm_asi_index = -1; + /* * When called, it means the previous get/set msr reached an invalid msr. * Return true if we want to ignore/silent this failed msr access. @@ -9750,6 +9753,11 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (r) goto out_free_percpu; + r = asi_register_class("KVM", NULL); + if (r < 0) + goto out_mmu_exit; + kvm_asi_index = r; + if (boot_cpu_has(X86_FEATURE_XSAVE)) { host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; @@ -9767,7 +9775,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) r = ops->hardware_setup(); if (r != 0) - goto out_mmu_exit; + goto out_asi_unregister; kvm_ops_update(ops); @@ -9820,6 +9828,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) out_unwind_ops: kvm_x86_ops.hardware_enable = NULL; static_call(kvm_x86_hardware_unsetup)(); +out_asi_unregister: + asi_unregister_class(kvm_asi_index); out_mmu_exit: kvm_mmu_vendor_module_exit(); out_free_percpu: @@ -9851,6 +9861,7 @@ void kvm_x86_vendor_exit(void) cancel_work_sync(&pvclock_gtod_work); #endif static_call(kvm_x86_hardware_unsetup)(); + asi_unregister_class(kvm_asi_index); kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); @@ -11436,6 +11447,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) r = vcpu_run(vcpu); + /* + * At present ASI doesn't have the capability to transition directly + * from the restricted address space to the user address space. So we + * just return to the unrestricted address space in between. + */ + asi_exit(); + out: kvm_put_guest_fpu(vcpu); if (kvm_run->kvm_valid_regs) @@ -12539,10 +12557,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_mmu_init_vm(kvm); - ret = static_call(kvm_x86_vm_init)(kvm); + ret = asi_init(kvm->mm, kvm_asi_index, &kvm->arch.asi); if (ret) goto out_uninit_mmu; + ret = static_call(kvm_x86_vm_init)(kvm); + if (ret) + goto out_asi_destroy; + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -12579,6 +12601,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; +out_asi_destroy: + asi_destroy(kvm->arch.asi); out_uninit_mmu: kvm_mmu_uninit_vm(kvm); kvm_page_track_cleanup(kvm); @@ -12720,6 +12744,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); + asi_destroy(kvm->arch.asi); kvm_mmu_uninit_vm(kvm); kvm_page_track_cleanup(kvm); kvm_xen_destroy_vm(kvm); -- 2.45.2.993.g49e7a77208-goog