On Tue, Jan 09, 2018 at 07:04:04PM +0000, Suzuki K Poulose wrote: > We allocate the entry level page tables for stage2 when the > VM is created. This doesn't give us the flexibility of configuring > the physical address space size for a VM. In order to allow > the VM to choose the required size, we delay the allocation of > stage2 entry level tables until we really try to map something. > > This could be either when the VM creates a memory range or when > it tries to map a device memory. So we add in a hook to these > two places to make sure the tables are allocated. We use > kvm->slots_lock to serialize the allocation entry point, since > we add hooks to the arch specific call back with the mutex held. > > Cc: Marc Zyngier <marc.zyngier@xxxxxxx> > Cc: Christoffer Dall <cdall@xxxxxxxxxx> > Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx> > --- > virt/kvm/arm/arm.c | 18 ++++++---------- > virt/kvm/arm/mmu.c | 61 +++++++++++++++++++++++++++++++++++++++++++++--------- > 2 files changed, 57 insertions(+), 22 deletions(-) > > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c > index 19b720ddedce..d06f00566664 100644 > --- a/virt/kvm/arm/arm.c > +++ b/virt/kvm/arm/arm.c > @@ -127,13 +127,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > for_each_possible_cpu(cpu) > *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; > > - ret = kvm_alloc_stage2_pgd(kvm); > - if (ret) > - goto out_fail_alloc; > - > ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); > - if (ret) > - goto out_free_stage2_pgd; > + if (ret) { > + free_percpu(kvm->arch.last_vcpu_ran); > + kvm->arch.last_vcpu_ran = NULL; > + return ret; > + } > + > > kvm_vgic_early_init(kvm); > > @@ -145,12 +145,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; > > return ret; > -out_free_stage2_pgd: > - kvm_free_stage2_pgd(kvm); > -out_fail_alloc: > - free_percpu(kvm->arch.last_vcpu_ran); > - kvm->arch.last_vcpu_ran = NULL; > - return ret; > } > > bool kvm_arch_has_vcpu_debugfs(void) > diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c > index c94c61ac38b9..257f2a8ccfc7 100644 > --- a/virt/kvm/arm/mmu.c > +++ b/virt/kvm/arm/mmu.c > @@ -1011,15 +1011,39 @@ static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) > return stage2_ptep_test_and_clear_young((pte_t *)pmd); > } > > -/** > - * kvm_phys_addr_ioremap - map a device range to guest IPA > - * > - * @kvm: The KVM pointer > - * @guest_ipa: The IPA at which to insert the mapping > - * @pa: The physical address of the device > - * @size: The size of the mapping > +/* > + * Finalise the stage2 page table layout. Must be called with kvm->slots_lock > + * held. > */ > -int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > +static int __kvm_init_stage2_table(struct kvm *kvm) > +{ > + /* Double check if somebody has already allocated it */ dubious comment: Either leave it out or explain that we need to check again with the mutex held. > + if (likely(kvm->arch.pgd)) > + return 0; > + return kvm_alloc_stage2_pgd(kvm); > +} > + > +static int kvm_init_stage2_table(struct kvm *kvm) > +{ > + int rc; > + > + /* > + * Once allocated, the stage2 entry level tables are only > + * freed when the KVM instance is destroyed. So, if we see > + * something valid here, that guarantees that we have > + * done the one time allocation and it is something valid > + * and won't go away until the last reference to the KVM > + * is gone. > + */ Really not sure if this comment adds something beyond what's described by the code already? Thanks, -Christoffer > + if (likely(kvm->arch.pgd)) > + return 0; > + mutex_lock(&kvm->slots_lock); > + rc = __kvm_init_stage2_table(kvm); > + mutex_unlock(&kvm->slots_lock); > + return rc; > +} > + > +static int __kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > phys_addr_t pa, unsigned long size, bool writable) > { > phys_addr_t addr, end; > @@ -1055,6 +1079,23 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > return ret; > } > > +/** > + * kvm_phys_addr_ioremap - map a device range to guest IPA. > + * Acquires kvm->slots_lock for making sure that the stage2 is initialized. > + * > + * @kvm: The KVM pointer > + * @guest_ipa: The IPA at which to insert the mapping > + * @pa: The physical address of the device > + * @size: The size of the mapping > + */ > +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > + phys_addr_t pa, unsigned long size, bool writable) > +{ > + if (unlikely(kvm_init_stage2_table(kvm))) > + return -ENOMEM; > + return __kvm_phys_addr_ioremap(kvm, guest_ipa, pa, size, writable); > +} > + > static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) > { > kvm_pfn_t pfn = *pfnp; > @@ -1912,7 +1953,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, > goto out; > } > > - ret = kvm_phys_addr_ioremap(kvm, gpa, pa, > + ret = __kvm_phys_addr_ioremap(kvm, gpa, pa, > vm_end - vm_start, > writable); > if (ret) > @@ -1943,7 +1984,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, > int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, > unsigned long npages) > { > - return 0; > + return __kvm_init_stage2_table(kvm); > } > > void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) > -- > 2.13.6 >