When the MMU is in pure direct mode, it uses a paging structure walk iterator and does not require the rmap. The rmap requires 8 bytes for every PTE that could be used to map guest memory. It is an expensive data strucutre at ~0.2% of the size of guest memory. Delay allocating the rmap until the MMU is no longer in pure direct mode. This could be caused, for example, by the guest launching a nested, L2 VM. Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- arch/x86/kvm/mmu.c | 15 ++++++++++ arch/x86/kvm/x86.c | 72 ++++++++++++++++++++++++++++++++++++++++++---- arch/x86/kvm/x86.h | 2 ++ 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e0f35da0d1027..72c2289132c43 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5228,8 +5228,23 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) u64 pdptr, pm_mask; gfn_t root_gfn, root_cr3; int i; + int r; write_lock(&vcpu->kvm->mmu_lock); + if (vcpu->kvm->arch.pure_direct_mmu) { + write_unlock(&vcpu->kvm->mmu_lock); + /* + * If this is the first time a VCPU has allocated shadow roots + * and the direct MMU is enabled on this VM, it will need to + * allocate rmaps for all its memslots. If the rmaps are already + * allocated, this call will have no effect. + */ + r = kvm_allocate_rmaps(vcpu->kvm); + if (r < 0) + return r; + write_lock(&vcpu->kvm->mmu_lock); + } + vcpu->kvm->arch.pure_direct_mmu = false; write_unlock(&vcpu->kvm->mmu_lock); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edd7d7bece2fe..566521f956425 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9615,14 +9615,21 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, kvm_page_track_free_memslot(free, dont); } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) +static int allocate_memslot_rmap(struct kvm *kvm, + struct kvm_memory_slot *slot, + unsigned long npages) { int i; + /* + * rmaps are allocated all-or-nothing under the slots + * lock, so we only need to check that the first rmap + * has been allocated. + */ + if (slot->arch.rmap[0]) + return 0; + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { - struct kvm_lpage_info *linfo; - unsigned long ugfn; int lpages; int level = i + 1; @@ -9634,8 +9641,61 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) goto out_free; - if (i == 0) - continue; + } + return 0; + +out_free: + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { + kvfree(slot->arch.rmap[i]); + slot->arch.rmap[i] = NULL; + } + return -ENOMEM; +} + +int kvm_allocate_rmaps(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *slot; + int r = 0; + int i; + + mutex_lock(&kvm->slots_lock); + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + slots = __kvm_memslots(kvm, i); + kvm_for_each_memslot(slot, slots) { + r = allocate_memslot_rmap(kvm, slot, slot->npages); + if (r < 0) + break; + } + } + mutex_unlock(&kvm->slots_lock); + return r; +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + int i; + int r; + + /* Set the rmap pointer for each level to NULL */ + memset(slot->arch.rmap, 0, + ARRAY_SIZE(slot->arch.rmap) * sizeof(*slot->arch.rmap)); + + if (!kvm->arch.pure_direct_mmu) { + r = allocate_memslot_rmap(kvm, slot, npages); + if (r < 0) + return r; + } + + for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) { + struct kvm_lpage_info *linfo; + unsigned long ugfn; + int lpages; + int level = i + 1; + + lpages = gfn_to_index(slot->base_gfn + npages - 1, + slot->base_gfn, level) + 1; linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index dbf7442a822b6..91bfbfd2c58d4 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -369,4 +369,6 @@ static inline bool kvm_pat_valid(u64 data) void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); +int kvm_allocate_rmaps(struct kvm *kvm); + #endif -- 2.23.0.444.g18eeb5a265-goog