On Fri, Jun 15, 2012 at 03:08:22PM -0400, Christoffer Dall wrote: > From: Christoffer Dall <cdall@xxxxxxxxxxxxxxx> > > This commit introduces the framework for guest memory management > through the use of 2nd stage translation. Each VM has a pointer > to a level-1 table (the pgd field in struct kvm_arch) which is > used for the 2nd stage translations. Entries are added when handling > guest faults (later patch) and the table itself can be allocated and > freed through the following functions implemented in > arch/arm/kvm/arm_mmu.c: > - kvm_alloc_stage2_pgd(struct kvm *kvm); > - kvm_free_stage2_pgd(struct kvm *kvm); > > Further, each entry in TLBs and caches are tagged with a VMID > identifier in addition to ASIDs. The VMIDs are assigned consecutively > to VMs in the order that VMs are executed, and caches and tlbs are > invalidated when the VMID space has been used to allow for more than > 255 simultaenously running guests. > > The 2nd stage pgd is allocated in kvm_arch_init_vm(). The table is > freed in kvm_arch_destroy_vm(). Both functions are called from the main > KVM code. > > Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> > --- > arch/arm/include/asm/kvm_arm.h | 2 - > arch/arm/include/asm/kvm_mmu.h | 5 ++ > arch/arm/kvm/arm.c | 65 ++++++++++++++++++++++--- > arch/arm/kvm/mmu.c | 103 ++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 166 insertions(+), 9 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h > index 7f30cbd..257242f 100644 > --- a/arch/arm/include/asm/kvm_arm.h > +++ b/arch/arm/include/asm/kvm_arm.h > @@ -62,7 +62,7 @@ > * SWIO: Turn set/way invalidates into set/way clean+invalidate > */ > #define HCR_GUEST_MASK (HCR_TSC | HCR_TWI | HCR_VM | HCR_BSU_IS | HCR_FB | \ > - HCR_AMO | HCR_IMO | HCR_FMO | HCR_FMO | HCR_SWIO) > + HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO) > > /* Hyp System Control Register (HSCTLR) bits */ > #define HSCTLR_TE (1 << 30) > diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h > index 1aa1af4..d95662eb 100644 > --- a/arch/arm/include/asm/kvm_mmu.h > +++ b/arch/arm/include/asm/kvm_mmu.h > @@ -34,4 +34,9 @@ int kvm_hyp_pgd_alloc(void); > pgd_t *kvm_hyp_pgd_get(void); > void kvm_hyp_pgd_free(void); > > +int kvm_alloc_stage2_pgd(struct kvm *kvm); > +void kvm_free_stage2_pgd(struct kvm *kvm); > + > +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); > + > #endif /* __ARM_KVM_MMU_H__ */ > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index efe130c..81babe9 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -38,6 +38,13 @@ > > static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); > > +/* The VMID used in the VTTBR */ > +#define VMID_BITS 8 > +#define VMID_MASK ((1 << VMID_BITS) - 1) > +#define VMID_FIRST_GENERATION (1 << VMID_BITS) > +static u64 next_vmid; /* The next available VMID in the sequence */ > +DEFINE_SPINLOCK(kvm_vmid_lock); > + > int kvm_arch_hardware_enable(void *garbage) > { > return 0; > @@ -70,14 +77,6 @@ void kvm_arch_sync_events(struct kvm *kvm) > { > } > > -int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > -{ > - if (type) > - return -EINVAL; > - > - return 0; > -} > - > int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) > { > return VM_FAULT_SIGBUS; > @@ -93,10 +92,46 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) > return 0; > } > > +/** > + * kvm_arch_init_vm - initializes a VM data structure > + * @kvm: pointer to the KVM struct > + */ > +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > +{ > + int ret = 0; > + > + if (type) > + return -EINVAL; > + > + ret = kvm_alloc_stage2_pgd(kvm); > + if (ret) > + goto out_fail_alloc; > + mutex_init(&kvm->arch.pgd_mutex); > + > + ret = create_hyp_mappings(kvm, kvm + 1); > + if (ret) > + goto out_free_stage2_pgd; > + > + /* Mark the initial VMID invalid */ > + kvm->arch.vmid = 0; > + > + return ret; > +out_free_stage2_pgd: > + kvm_free_stage2_pgd(kvm); > +out_fail_alloc: > + return ret; > +} > + > +/** > + * kvm_arch_destroy_vm - destroy the VM data structure > + * @kvm: pointer to the KVM struct > + */ > void kvm_arch_destroy_vm(struct kvm *kvm) > { > int i; > > + kvm_free_stage2_pgd(kvm); > + > for (i = 0; i < KVM_MAX_VCPUS; ++i) { > if (kvm->vcpus[i]) { > kvm_arch_vcpu_free(kvm->vcpus[i]); > @@ -172,6 +207,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) > if (err) > goto free_vcpu; > > + err = create_hyp_mappings(vcpu, vcpu + 1); > + if (err) > + goto free_vcpu; > + > return vcpu; > free_vcpu: > kmem_cache_free(kvm_vcpu_cache, vcpu); > @@ -181,6 +220,7 @@ out: > > void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) > { > + kmem_cache_free(kvm_vcpu_cache, vcpu); > } > > void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > @@ -416,6 +456,15 @@ int kvm_arch_init(void *opaque) > if (err) > goto out_err; > > + /* > + * The upper 56 bits of VMIDs are used to identify the generation > + * counter, so VMIDs initialized to 0, having generation == 0, will > + * never be considered valid and therefor a new VMID must always be > + * assigned. Whent he VMID generation rolls over, we start from > + * VMID_FIRST_GENERATION again. > + */ > + next_vmid = VMID_FIRST_GENERATION; > + > return 0; > out_err: > return err; > diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c > index a320b56a..b256540 100644 > --- a/arch/arm/kvm/mmu.c > +++ b/arch/arm/kvm/mmu.c > @@ -159,6 +159,109 @@ out: > return err; > } > > +/** > + * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. > + * @kvm: The KVM struct pointer for the VM. > + * > + * Allocates the 1st level table only of size defined by PGD2_ORDER (can > + * support either full 40-bit input addresses or limited to 32-bit input > + * addresses). Clears the allocated pages. > + */ > +int kvm_alloc_stage2_pgd(struct kvm *kvm) > +{ > + pgd_t *pgd; > + > + if (kvm->arch.pgd != NULL) { > + kvm_err("kvm_arch already initialized?\n"); > + return -EINVAL; > + } > + > + pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER); > + if (!pgd) > + return -ENOMEM; > + > + memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t)); > + kvm->arch.pgd = pgd; > + > + return 0; > +} > + > +static void free_guest_pages(pte_t *pte, unsigned long addr) > +{ > + unsigned int i; > + struct page *page; > + > + for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { Hmm, "addr" is not used. > + if (!pte_present(*pte)) > + goto next_page; Why goto instead of: if(pte_present(*pte)) { > + page = pfn_to_page(pte_pfn(*pte)); > + put_page(page); } > +next_page: > + pte++; > + } > +} > + > +static void free_stage2_ptes(pmd_t *pmd, unsigned long addr) > +{ > + unsigned int i; > + pte_t *pte; > + struct page *page; > + > + for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { > + BUG_ON(pmd_sect(*pmd)); > + if (!pmd_none(*pmd) && pmd_table(*pmd)) { > + pte = pte_offset_kernel(pmd, addr); > + free_guest_pages(pte, addr); > + page = virt_to_page((void *)pte); > + WARN_ON(atomic_read(&page->_count) != 1); > + pte_free_kernel(NULL, pte); > + } > + pmd++; > + } > +} > + > +/** > + * kvm_free_stage2_pgd - free all stage-2 tables > + * @kvm: The KVM struct pointer for the VM. > + * > + * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all > + * underlying level-2 and level-3 tables before freeing the actual level-1 table > + * and setting the struct pointer to NULL. > + */ > +void kvm_free_stage2_pgd(struct kvm *kvm) > +{ > + pgd_t *pgd; > + pud_t *pud; > + pmd_t *pmd; > + unsigned long long i, addr; > + > + if (kvm->arch.pgd == NULL) > + return; > + > + /* > + * We do this slightly different than other places, since we need more > + * than 32 bits and for instance pgd_addr_end converts to unsigned long. > + */ > + addr = 0; > + for (i = 0; i < PTRS_PER_PGD2; i++) { > + addr = i * (unsigned long long)PGDIR_SIZE; > + pgd = kvm->arch.pgd + i; > + pud = pud_offset(pgd, addr); > + > + if (pud_none(*pud)) > + continue; > + > + BUG_ON(pud_bad(*pud)); > + > + pmd = pmd_offset(pud, addr); > + free_stage2_ptes(pmd, addr); > + pmd_free(NULL, pmd); > + } > + > + free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER); > + kvm->arch.pgd = NULL; > +} > + > int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) > { > return -EINVAL; > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html