The direct MMU introduces several new fields that need to be initialized and torn down. Add functions to do that initialization / cleanup. Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 51 ++++++++---- arch/x86/kvm/mmu.c | 132 +++++++++++++++++++++++++++++--- arch/x86/kvm/x86.c | 16 +++- 3 files changed, 169 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 23edf56cf577c..1f8164c577d50 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -236,6 +236,22 @@ enum { */ #define KVM_APIC_PV_EOI_PENDING 1 +#define HF_GIF_MASK (1 << 0) +#define HF_HIF_MASK (1 << 1) +#define HF_VINTR_MASK (1 << 2) +#define HF_NMI_MASK (1 << 3) +#define HF_IRET_MASK (1 << 4) +#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ +#define HF_SMM_MASK (1 << 6) +#define HF_SMM_INSIDE_NMI_MASK (1 << 7) + +#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +#define KVM_ADDRESS_SPACE_NUM 2 + +#define kvm_arch_vcpu_memslots_id(vcpu) \ + ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) +#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) + struct kvm_kernel_irq_routing_entry; /* @@ -940,6 +956,24 @@ struct kvm_arch { bool exception_payload_enabled; struct kvm_pmu_event_filter *pmu_event_filter; + + /* + * Whether the direct MMU is enabled for this VM. This contains a + * snapshot of the direct MMU module parameter from when the VM was + * created and remains unchanged for the life of the VM. If this is + * true, direct MMU handler functions will run for various MMU + * operations. + */ + bool direct_mmu_enabled; + /* + * Indicates that the paging structure built by the direct MMU is + * currently the only one in use. If nesting is used, prompting the + * creation of shadow page tables for L2, this will be set to false. + * While this is true, only direct MMU handlers will be run for many + * MMU functions. Ignored if !direct_mmu_enabled. + */ + bool pure_direct_mmu; + hpa_t direct_root_hpa[KVM_ADDRESS_SPACE_NUM]; }; struct kvm_vm_stat { @@ -1255,7 +1289,7 @@ void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_init_vm(struct kvm *kvm); +int kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, @@ -1519,21 +1553,6 @@ enum { TASK_SWITCH_GATE = 3, }; -#define HF_GIF_MASK (1 << 0) -#define HF_HIF_MASK (1 << 1) -#define HF_VINTR_MASK (1 << 2) -#define HF_NMI_MASK (1 << 3) -#define HF_IRET_MASK (1 << 4) -#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ -#define HF_SMM_MASK (1 << 6) -#define HF_SMM_INSIDE_NMI_MASK (1 << 7) - -#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE -#define KVM_ADDRESS_SPACE_NUM 2 - -#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) -#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) - asmlinkage void kvm_spurious_fault(void); /* diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 50413f17c7cd0..788edbda02f69 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -47,6 +47,10 @@ #include <asm/kvm_page_track.h> #include "trace.h" +static bool __read_mostly direct_mmu_enabled; +module_param_named(enable_direct_mmu, direct_mmu_enabled, bool, + S_IRUGO | S_IWUSR); + /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: @@ -3754,27 +3758,56 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, *root_hpa = INVALID_PAGE; } +static bool is_direct_mmu_root(struct kvm *kvm, hpa_t root) +{ + int as_id; + + for (as_id = 0; as_id < KVM_ADDRESS_SPACE_NUM; as_id++) + if (root == kvm->arch.direct_root_hpa[as_id]) + return true; + + return false; +} + /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ulong roots_to_free) { int i; LIST_HEAD(invalid_list); - bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT; BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG); - /* Before acquiring the MMU lock, see if we need to do any real work. */ - if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) { - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) && - VALID_PAGE(mmu->prev_roots[i].hpa)) - break; + /* + * Direct MMU paging structures follow the life of the VM, so instead of + * destroying direct MMU paging structure root, simply mark the root + * HPA pointing to it as invalid. + */ + if (vcpu->kvm->arch.direct_mmu_enabled && + roots_to_free & KVM_MMU_ROOT_CURRENT && + is_direct_mmu_root(vcpu->kvm, mmu->root_hpa)) + mmu->root_hpa = INVALID_PAGE; - if (i == KVM_MMU_NUM_PREV_ROOTS) - return; + if (!VALID_PAGE(mmu->root_hpa)) + roots_to_free &= ~KVM_MMU_ROOT_CURRENT; + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) { + if (is_direct_mmu_root(vcpu->kvm, + mmu->prev_roots[i].hpa)) + mmu->prev_roots[i].hpa = INVALID_PAGE; + if (!VALID_PAGE(mmu->prev_roots[i].hpa)) + roots_to_free &= ~KVM_MMU_ROOT_PREVIOUS(i); + } } + /* + * If there are no valid roots that need freeing at this point, avoid + * acquiring the MMU lock and return. + */ + if (!roots_to_free) + return; + write_lock(&vcpu->kvm->mmu_lock); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) @@ -3782,7 +3815,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa, &invalid_list); - if (free_active_root) { + if (roots_to_free & KVM_MMU_ROOT_CURRENT) { if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) { mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, @@ -3820,7 +3853,12 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) struct kvm_mmu_page *sp; unsigned i; - if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) { + if (vcpu->kvm->arch.direct_mmu_enabled) { + // TODO: Support 5 level paging in the direct MMU + BUG_ON(vcpu->arch.mmu->shadow_root_level > PT64_ROOT_4LEVEL); + vcpu->arch.mmu->root_hpa = vcpu->kvm->arch.direct_root_hpa[ + kvm_arch_vcpu_memslots_id(vcpu)]; + } else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) { write_lock(&vcpu->kvm->mmu_lock); if(make_mmu_pages_available(vcpu) < 0) { write_unlock(&vcpu->kvm->mmu_lock); @@ -3863,6 +3901,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) gfn_t root_gfn, root_cr3; int i; + write_lock(&vcpu->kvm->mmu_lock); + vcpu->kvm->arch.pure_direct_mmu = false; + write_unlock(&vcpu->kvm->mmu_lock); + root_cr3 = vcpu->arch.mmu->get_cr3(vcpu); root_gfn = root_cr3 >> PAGE_SHIFT; @@ -5710,6 +5752,64 @@ void kvm_disable_tdp(void) } EXPORT_SYMBOL_GPL(kvm_disable_tdp); +static bool is_direct_mmu_enabled(void) +{ + if (!READ_ONCE(direct_mmu_enabled)) + return false; + + if (WARN_ONCE(!tdp_enabled, + "Creating a VM with direct MMU enabled requires TDP.")) + return false; + + return true; +} + +static int kvm_mmu_init_direct_mmu(struct kvm *kvm) +{ + struct page *page; + int i; + + if (!is_direct_mmu_enabled()) + return 0; + + /* + * Allocate the direct MMU root pages. These pages follow the life of + * the VM. + */ + for (i = 0; i < ARRAY_SIZE(kvm->arch.direct_root_hpa); i++) { + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!page) + goto err; + kvm->arch.direct_root_hpa[i] = page_to_phys(page); + } + + /* This should not be changed for the lifetime of the VM. */ + kvm->arch.direct_mmu_enabled = true; + + kvm->arch.pure_direct_mmu = true; + return 0; +err: + for (i = 0; i < ARRAY_SIZE(kvm->arch.direct_root_hpa); i++) { + if (kvm->arch.direct_root_hpa[i] && + VALID_PAGE(kvm->arch.direct_root_hpa[i])) + free_page((unsigned long)kvm->arch.direct_root_hpa[i]); + kvm->arch.direct_root_hpa[i] = INVALID_PAGE; + } + return -ENOMEM; +} + +static void kvm_mmu_uninit_direct_mmu(struct kvm *kvm) +{ + int i; + + if (!kvm->arch.direct_mmu_enabled) + return; + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) + handle_disconnected_pt(kvm, i, 0, + (kvm_pfn_t)(kvm->arch.direct_root_hpa[i] >> PAGE_SHIFT), + PT64_ROOT_4LEVEL); +} /* The return value indicates if tlb flush on all vcpus is needed. */ typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); @@ -5956,13 +6056,19 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, kvm_mmu_zap_all_fast(kvm); } -void kvm_mmu_init_vm(struct kvm *kvm) +int kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + int r; + + r = kvm_mmu_init_direct_mmu(kvm); + if (r) + return r; node->track_write = kvm_mmu_pte_write; node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; kvm_page_track_register_notifier(kvm, node); + return 0; } void kvm_mmu_uninit_vm(struct kvm *kvm) @@ -5970,6 +6076,8 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; kvm_page_track_unregister_notifier(kvm, node); + + kvm_mmu_uninit_direct_mmu(kvm); } void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ecf83da396c9..2972b6c6029fb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9421,6 +9421,8 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + int err; + if (type) return -EINVAL; @@ -9450,9 +9452,19 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_hv_init_vm(kvm); kvm_page_track_init(kvm); - kvm_mmu_init_vm(kvm); + err = kvm_mmu_init_vm(kvm); + if (err) + return err; + + err = kvm_x86_ops->vm_init(kvm); + if (err) + goto error; + + return 0; - return kvm_x86_ops->vm_init(kvm); +error: + kvm_mmu_uninit_vm(kvm); + return err; } static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) -- 2.23.0.444.g18eeb5a265-goog