Re: [PATCH v10 07/14] KVM: ARM: Memory virtualization setup

Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> · Sun, 19 Aug 2012 00:34:24 -0400

On Thu, Aug 16, 2012 at 2:25 PM, Alexander Graf <agraf@xxxxxxx> wrote:
>
> On 16.08.2012, at 17:29, Christoffer Dall wrote:
>
>> This commit introduces the framework for guest memory management
>> through the use of 2nd stage translation. Each VM has a pointer
>> to a level-1 table (the pgd field in struct kvm_arch) which is
>> used for the 2nd stage translations. Entries are added when handling
>> guest faults (later patch) and the table itself can be allocated and
>> freed through the following functions implemented in
>> arch/arm/kvm/arm_mmu.c:
>> - kvm_alloc_stage2_pgd(struct kvm *kvm);
>> - kvm_free_stage2_pgd(struct kvm *kvm);
>>
>> Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
>> pgprot_guest variables used to map 2nd stage memory for KVM guests.
>>
>> Each entry in TLBs and caches are tagged with a VMID identifier in
>> addition to ASIDs. The VMIDs are assigned consecutively to VMs in the
>> order that VMs are executed, and caches and tlbs are invalidated when
>> the VMID space has been used to allow for more than 255 simultaenously
>> running guests.
>>
>> The 2nd stage pgd is allocated in kvm_arch_init_vm(). The table is
>> freed in kvm_arch_destroy_vm(). Both functions are called from the main
>> KVM code.
>>
>> We pre-allocate page table memory to be able to synchronize using a
>> spinlock and be called under rcu_read_lock from the MMU notifiers.  We
>> steal the mmu_memory_cache implementation from x86 and adapt for our
>> specific usage.
>>
>> We support MMU notifiers (thanks to Marc Zyngier) through
>> kvm_unmap_hva and kvm_set_spte_hva.
>>
>> Finally, define kvm_phys_addr_ioremap() to map a device at a guest IPA,
>> which is used by VGIC support to map the virtual CPU interface registers
>> to the guest. This support is added by Marc Zyngier.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
>> Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx>
>> ---
>> arch/arm/include/asm/kvm_asm.h        |    2
>> arch/arm/include/asm/kvm_host.h       |   18 ++
>> arch/arm/include/asm/kvm_mmu.h        |    9 +
>> arch/arm/include/asm/pgtable-3level.h |    9 +
>> arch/arm/include/asm/pgtable.h        |    4
>> arch/arm/kvm/Kconfig                  |    1
>> arch/arm/kvm/arm.c                    |   38 +++
>> arch/arm/kvm/exports.c                |    1
>> arch/arm/kvm/interrupts.S             |    8 +
>> arch/arm/kvm/mmu.c                    |  373 +++++++++++++++++++++++++++++++++
>> arch/arm/mm/mmu.c                     |    3
>> 11 files changed, 465 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
>> index 58d51e3..55b6446 100644
>> --- a/arch/arm/include/asm/kvm_asm.h
>> +++ b/arch/arm/include/asm/kvm_asm.h
>> @@ -34,6 +34,7 @@
>> #define SMCHYP_HVBAR_W 0xfffffff0
>>
>> #ifndef __ASSEMBLY__
>> +struct kvm;
>> struct kvm_vcpu;
>>
>> extern char __kvm_hyp_init[];
>> @@ -48,6 +49,7 @@ extern char __kvm_hyp_code_start[];
>> extern char __kvm_hyp_code_end[];
>>
>> extern void __kvm_flush_vm_context(void);
>> +extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
>>
>> extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
>> #endif
>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>> index d7e3398..d86ce39 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -157,4 +157,22 @@ struct kvm_vcpu_stat {
>> struct kvm_vcpu_init;
>> int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
>>                       const struct kvm_vcpu_init *init);
>> +
>> +#define KVM_ARCH_WANT_MMU_NOTIFIER
>> +struct kvm;
>> +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
>> +int kvm_unmap_hva_range(struct kvm *kvm,
>> +                     unsigned long start, unsigned long end);
>> +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
>> +
>> +/* We do not have shadow page tables, hence the empty hooks */
>> +static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
>> +{
>> +     return 0;
>> +}
>> #endif /* __ARM_KVM_HOST_H__ */
>> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
>> index 8252921..11f4c3a 100644
>> --- a/arch/arm/include/asm/kvm_mmu.h
>> +++ b/arch/arm/include/asm/kvm_mmu.h
>> @@ -33,4 +33,13 @@ int create_hyp_mappings(void *from, void *to);
>> int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
>> void free_hyp_pmds(void);
>>
>> +int kvm_alloc_stage2_pgd(struct kvm *kvm);
>> +void kvm_free_stage2_pgd(struct kvm *kvm);
>> +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
>> +                       phys_addr_t pa, unsigned long size);
>> +
>> +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
>> +
>> +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
>> +
>> #endif /* __ARM_KVM_MMU_H__ */
>> diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
>> index 1169a8a..7351eee 100644
>> --- a/arch/arm/include/asm/pgtable-3level.h
>> +++ b/arch/arm/include/asm/pgtable-3level.h
>> @@ -102,6 +102,15 @@
>>  */
>> #define L_PGD_SWAPPER         (_AT(pgdval_t, 1) << 55)        /* swapper_pg_dir entry */
>>
>> +/*
>> + * 2-nd stage PTE definitions for LPAE.
>> + */
>> +#define L_PTE2_SHARED                L_PTE_SHARED
>> +#define L_PTE2_READ          (_AT(pteval_t, 1) << 6) /* HAP[0] */
>> +#define L_PTE2_WRITE         (_AT(pteval_t, 1) << 7) /* HAP[1] */
>> +#define L_PTE2_NORM_WB               (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
>> +#define L_PTE2_INNER_WB              (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */
>> +
>> #ifndef __ASSEMBLY__
>>
>> #define pud_none(pud)         (!pud_val(pud))
>> diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
>> index bc83540..a31d0e9 100644
>> --- a/arch/arm/include/asm/pgtable.h
>> +++ b/arch/arm/include/asm/pgtable.h
>> @@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
>>
>> extern pgprot_t               pgprot_user;
>> extern pgprot_t               pgprot_kernel;
>> +extern pgprot_t              pgprot_guest;
>>
>> #define _MOD_PROT(p, b)       __pgprot(pgprot_val(p) | (b))
>>
>> @@ -83,6 +84,9 @@ extern pgprot_t             pgprot_kernel;
>> #define PAGE_KERNEL           _MOD_PROT(pgprot_kernel, L_PTE_XN)
>> #define PAGE_KERNEL_EXEC      pgprot_kernel
>> #define PAGE_HYP              _MOD_PROT(pgprot_kernel, L_PTE_USER)
>> +#define PAGE_KVM_GUEST               _MOD_PROT(pgprot_guest, L_PTE2_READ | \
>> +                                       L_PTE2_NORM_WB | L_PTE2_INNER_WB | \
>> +                                       L_PTE2_SHARED)
>>
>> #define __PAGE_NONE           __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
>> #define __PAGE_SHARED         __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
>> diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
>> index 83abbe0..7fa50d3 100644
>> --- a/arch/arm/kvm/Kconfig
>> +++ b/arch/arm/kvm/Kconfig
>> @@ -36,6 +36,7 @@ config KVM_ARM_HOST
>>       depends on KVM
>>       depends on MMU
>>       depends on CPU_V7 && ARM_VIRT_EXT
>> +     select  MMU_NOTIFIER
>>       ---help---
>>         Provides host support for ARM processors.
>>
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index 0b1c466..3f97e7c 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -82,12 +82,34 @@ void kvm_arch_sync_events(struct kvm *kvm)
>> {
>> }
>>
>> +/**
>> + * kvm_arch_init_vm - initializes a VM data structure
>> + * @kvm:     pointer to the KVM struct
>> + */
>> int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>> {
>> +     int ret = 0;
>> +
>>       if (type)
>>               return -EINVAL;
>>
>> -     return 0;
>> +     ret = kvm_alloc_stage2_pgd(kvm);
>> +     if (ret)
>> +             goto out_fail_alloc;
>> +     spin_lock_init(&kvm->arch.pgd_lock);
>> +
>> +     ret = create_hyp_mappings(kvm, kvm + 1);
>> +     if (ret)
>> +             goto out_free_stage2_pgd;
>> +
>> +     /* Mark the initial VMID generation invalid */
>> +     kvm->arch.vmid_gen = 0;
>> +
>> +     return ret;
>> +out_free_stage2_pgd:
>> +     kvm_free_stage2_pgd(kvm);
>> +out_fail_alloc:
>> +     return ret;
>> }
>>
>> int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
>> @@ -105,10 +127,16 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
>>       return 0;
>> }
>>
>> +/**
>> + * kvm_arch_destroy_vm - destroy the VM data structure
>> + * @kvm:     pointer to the KVM struct
>> + */
>> void kvm_arch_destroy_vm(struct kvm *kvm)
>> {
>>       int i;
>>
>> +     kvm_free_stage2_pgd(kvm);
>> +
>>       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>               if (kvm->vcpus[i]) {
>>                       kvm_arch_vcpu_free(kvm->vcpus[i]);
>> @@ -184,7 +212,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
>>       if (err)
>>               goto free_vcpu;
>>
>> +     err = create_hyp_mappings(vcpu, vcpu + 1);
>> +     if (err)
>> +             goto vcpu_uninit;
>> +
>>       return vcpu;
>> +vcpu_uninit:
>> +     kvm_vcpu_uninit(vcpu);
>> free_vcpu:
>>       kmem_cache_free(kvm_vcpu_cache, vcpu);
>> out:
>> @@ -193,6 +227,8 @@ out:
>>
>> void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
>> {
>> +     kvm_mmu_free_memory_caches(vcpu);
>> +     kmem_cache_free(kvm_vcpu_cache, vcpu);
>> }
>>
>> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>> diff --git a/arch/arm/kvm/exports.c b/arch/arm/kvm/exports.c
>> index 8ebdf07..f39f823 100644
>> --- a/arch/arm/kvm/exports.c
>> +++ b/arch/arm/kvm/exports.c
>> @@ -33,5 +33,6 @@ EXPORT_SYMBOL_GPL(__kvm_hyp_code_end);
>> EXPORT_SYMBOL_GPL(__kvm_vcpu_run);
>>
>> EXPORT_SYMBOL_GPL(__kvm_flush_vm_context);
>> +EXPORT_SYMBOL_GPL(__kvm_tlb_flush_vmid);
>>
>> EXPORT_SYMBOL_GPL(smp_send_reschedule);
>> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
>> index bf09801..edf9ed5 100644
>> --- a/arch/arm/kvm/interrupts.S
>> +++ b/arch/arm/kvm/interrupts.S
>> @@ -31,6 +31,14 @@ __kvm_hyp_code_start:
>>       .globl __kvm_hyp_code_start
>>
>> @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
>> +@  Flush per-VMID TLBs
>> +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
>> +
>> +ENTRY(__kvm_tlb_flush_vmid)
>> +     bx      lr
>> +ENDPROC(__kvm_tlb_flush_vmid)
>> +
>> +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
>> @  Flush TLBs and instruction caches of current CPU for all VMIDs
>> @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
>>
>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>> index 6a7dfd4..6cb0e38 100644
>> --- a/arch/arm/kvm/mmu.c
>> +++ b/arch/arm/kvm/mmu.c
>> @@ -23,10 +23,43 @@
>> #include <asm/pgalloc.h>
>> #include <asm/kvm_arm.h>
>> #include <asm/kvm_mmu.h>
>> +#include <asm/kvm_asm.h>
>> #include <asm/mach/map.h>
>>
>> static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
>>
>> +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
>> +                               int min, int max)
>> +{
>> +     void *page;
>> +
>> +     BUG_ON(max > KVM_NR_MEM_OBJS);
>> +     if (cache->nobjs >= min)
>> +             return 0;
>> +     while (cache->nobjs < max) {
>> +             page = (void *)__get_free_page(PGALLOC_GFP);
>> +             if (!page)
>> +                     return -ENOMEM;
>> +             cache->objects[cache->nobjs++] = page;
>> +     }
>> +     return 0;
>> +}
>> +
>> +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
>> +{
>> +     while (mc->nobjs)
>> +             free_page((unsigned long)mc->objects[--mc->nobjs]);
>> +}
>> +
>> +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
>> +{
>> +     void *p;
>> +
>> +     BUG_ON(!mc || !mc->nobjs);
>> +     p = mc->objects[--mc->nobjs];
>> +     return p;
>> +}
>> +
>> static void free_ptes(pmd_t *pmd, unsigned long addr)
>> {
>>       pte_t *pte;
>> @@ -200,7 +233,347 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
>>       return __create_hyp_mappings(from, to, &pfn);
>> }
>>
>> +/**
>> + * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
>> + * @kvm:     The KVM struct pointer for the VM.
>> + *
>> + * Allocates the 1st level table only of size defined by PGD2_ORDER (can
>> + * support either full 40-bit input addresses or limited to 32-bit input
>> + * addresses). Clears the allocated pages.
>> + *
>> + * Note we don't need locking here as this is only called when the VM is
>> + * created, which can only be done once.
>> + */
>> +int kvm_alloc_stage2_pgd(struct kvm *kvm)
>> +{
>> +     pgd_t *pgd;
>> +
>> +     if (kvm->arch.pgd != NULL) {
>> +             kvm_err("kvm_arch already initialized?\n");
>> +             return -EINVAL;
>> +     }
>> +
>> +     pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER);
>> +     if (!pgd)
>> +             return -ENOMEM;
>> +
>> +     memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t));
>> +     kvm->arch.pgd = pgd;
>> +
>> +     return 0;
>> +}
>> +
>> +static void free_guest_pages(pte_t *pte, unsigned long addr)
>> +{
>> +     unsigned int i;
>> +     struct page *page, *pte_page;
>> +
>> +     pte_page = virt_to_page(pte);
>> +
>> +     for (i = 0; i < PTRS_PER_PTE; i++) {
>> +             if (pte_present(*pte)) {
>> +                     unsigned long pfn = pte_pfn(*pte);
>> +
>> +                     if (pfn_valid(pfn)) { /* Skip over device memory */
>> +                             page = pfn_to_page(pfn);
>> +                             put_page(page);
>> +                     }
>> +                     put_page(pte_page);
>> +             }
>> +             pte++;
>> +     }
>> +}
>> +
>> +static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
>> +{
>> +     unsigned int i;
>> +     pte_t *pte;
>> +     struct page *page, *pmd_page;
>> +
>> +     pmd_page = virt_to_page(pmd);
>> +
>> +     for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
>> +             BUG_ON(pmd_sect(*pmd));
>> +             if (!pmd_none(*pmd) && pmd_table(*pmd)) {
>> +                     pte = pte_offset_kernel(pmd, addr);
>> +                     free_guest_pages(pte, addr);
>> +                     page = virt_to_page((void *)pte);
>> +                     WARN_ON(page_count(page) != 1);
>> +                     pte_free_kernel(NULL, pte);
>> +
>> +                     put_page(pmd_page);
>> +             }
>> +             pmd++;
>> +     }
>> +}
>> +
>> +/**
>> + * kvm_free_stage2_pgd - free all stage-2 tables
>> + * @kvm:     The KVM struct pointer for the VM.
>> + *
>> + * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
>> + * underlying level-2 and level-3 tables before freeing the actual level-1 table
>> + * and setting the struct pointer to NULL.
>> + *
>> + * Note we don't need locking here as this is only called when the VM is
>> + * destroyed, which can only be done once.
>> + */
>> +void kvm_free_stage2_pgd(struct kvm *kvm)
>> +{
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     unsigned long long i, addr;
>> +     struct page *page, *pud_page;
>> +
>> +     if (kvm->arch.pgd == NULL)
>> +             return;
>> +
>> +     /*
>> +      * We do this slightly different than other places, since we need more
>> +      * than 32 bits and for instance pgd_addr_end converts to unsigned long.
>> +      */
>> +     addr = 0;
>> +     for (i = 0; i < PTRS_PER_PGD2; i++) {
>> +             addr = i * (unsigned long long)PGDIR_SIZE;
>> +             pgd = kvm->arch.pgd + i;
>> +             pud = pud_offset(pgd, addr);
>> +             pud_page = virt_to_page(pud);
>> +
>> +             if (pud_none(*pud))
>> +                     continue;
>> +
>> +             BUG_ON(pud_bad(*pud));
>> +
>> +             pmd = pmd_offset(pud, addr);
>> +             free_stage2_ptes(pmd, addr);
>> +             page = virt_to_page((void *)pmd);
>> +             WARN_ON(page_count(page) != 1);
>> +             pmd_free(NULL, pmd);
>> +             put_page(pud_page);
>> +     }
>> +
>> +     WARN_ON(page_count(pud_page) != 1);
>> +     free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER);
>> +     kvm->arch.pgd = NULL;
>> +}
>> +
>> +/*
>> + * Clear a stage-2 PTE, lowering the various ref-counts. Also takes
>> + * care of invalidating the TLBs.  Must be called while holding
>> + * pgd_lock, otherwise another faulting VCPU may come in and mess
>> + * things behind our back.
>> + */
>> +static void stage2_clear_pte(struct kvm *kvm, phys_addr_t addr)
>> +{
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     pte_t *pte;
>> +     struct page *page;
>> +
>> +     kvm_debug("Clearing PTE&%08llx\n", addr);
>> +     pgd = kvm->arch.pgd + pgd_index(addr);
>> +     pud = pud_offset(pgd, addr);
>> +     BUG_ON(pud_none(*pud));
>> +
>> +     pmd = pmd_offset(pud, addr);
>> +     BUG_ON(pmd_none(*pmd));
>> +
>> +     pte = pte_offset_kernel(pmd, addr);
>> +     set_pte_ext(pte, __pte(0), 0);
>> +
>> +     page = virt_to_page(pte);
>> +     put_page(page);
>> +     if (page_count(page) != 1) {
>> +             __kvm_tlb_flush_vmid(kvm);
>> +             return;
>> +     }
>> +
>> +     /* Need to remove pte page */
>> +     pmd_clear(pmd);
>> +     __kvm_tlb_flush_vmid(kvm);
>> +     pte_free_kernel(NULL, (pte_t *)((unsigned long)pte & PAGE_MASK));
>> +
>> +     page = virt_to_page(pmd);
>> +     put_page(page);
>> +     if (page_count(page) != 1)
>> +             return;
>> +
>> +     /*
>> +      * Need to remove pmd page. This is the worst case, and we end
>> +      * up invalidating the TLB twice. No big deal.
>> +      */
>> +     pud_clear(pud);
>> +     __kvm_tlb_flush_vmid(kvm);
>> +     pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
>> +
>> +     page = virt_to_page(pud);
>> +     put_page(page);
>> +}
>> +
>> +static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
>> +                        phys_addr_t addr, const pte_t *new_pte)
>> +{
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     pte_t *pte;
>> +
>> +     /* Create 2nd stage page table mapping - Level 1 */
>> +     pgd = kvm->arch.pgd + pgd_index(addr);
>> +     pud = pud_offset(pgd, addr);
>> +     if (pud_none(*pud)) {
>> +             if (!cache)
>> +                     return; /* ignore calls from kvm_set_spte_hva */
>> +             pmd = mmu_memory_cache_alloc(cache);
>> +             pud_populate(NULL, pud, pmd);
>> +             pmd += pmd_index(addr);
>> +             get_page(virt_to_page(pud));
>> +     } else
>> +             pmd = pmd_offset(pud, addr);
>> +
>> +     /* Create 2nd stage page table mapping - Level 2 */
>> +     if (pmd_none(*pmd)) {
>> +             if (!cache)
>> +                     return; /* ignore calls from kvm_set_spte_hva */
>> +             pte = mmu_memory_cache_alloc(cache);
>> +             clean_pte_table(pte);
>> +             pmd_populate_kernel(NULL, pmd, pte);
>> +             pte += pte_index(addr);
>> +             get_page(virt_to_page(pmd));
>> +     } else
>> +             pte = pte_offset_kernel(pmd, addr);
>> +
>> +     /* Create 2nd stage page table mapping - Level 3 */
>> +     BUG_ON(pte_none(pte));
>> +     set_pte_ext(pte, *new_pte, 0);
>> +     get_page(virt_to_page(pte));
>> +}
>> +
>> +/**
>> + * kvm_phys_addr_ioremap - map a device range to guest IPA
>> + *
>> + * @kvm:     The KVM pointer
>> + * @guest_ipa:       The IPA at which to insert the mapping
>> + * @pa:              The physical address of the device
>> + * @size:    The size of the mapping
>> + */
>> +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
>> +                       phys_addr_t pa, unsigned long size)
>> +{
>> +     phys_addr_t addr, end;
>> +     pgprot_t prot;
>> +     int ret = 0;
>> +     unsigned long pfn;
>> +     struct kvm_mmu_memory_cache cache = { 0, };
>> +
>> +     end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
>> +     prot = __pgprot(get_mem_type_prot_pte(MT_DEVICE) | L_PTE_USER |
>> +                     L_PTE2_READ | L_PTE2_WRITE);
>> +     pfn = __phys_to_pfn(pa);
>> +
>> +     for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
>> +             pte_t pte = pfn_pte(pfn, prot);
>> +
>> +             ret = mmu_topup_memory_cache(&cache, 2, 2);
>> +             if (ret)
>> +                     goto out;
>> +             spin_lock(&kvm->arch.pgd_lock);
>> +             stage2_set_pte(kvm, &cache, addr, &pte);
>> +             spin_unlock(&kvm->arch.pgd_lock);
>> +
>> +             pfn++;
>> +     }
>> +
>> +out:
>> +     mmu_free_memory_cache(&cache);
>> +     return ret;
>> +}
>> +
>> int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> {
>>       return -EINVAL;
>> }
>> +
>> +static bool hva_to_gpa(struct kvm *kvm, unsigned long hva, gpa_t *gpa)
>
> A single hva can have multiple gpas mapped, no? At least that's what I gathered from the discussion about my attempt to a function similar to this :).
>

I don't think this is the case for ARM, can you provide an example? We
use gfn_to_pfn_prot and only allow user memory regions. What you
suggest would be multiple physical addresses pointing to the same
memory bank, I don't think that makes any sense on ARM hardware, for
x86 and PPC I don't know.

Unless I miss something completely all together here...?

> I'm also having a hard time following your mmu code in general. When do pages get mapped? Where do they get mapped from?
>

as the text describes, this happens in a later patch. See
user_mem_abort in patch 12.

>
> Alex
>
_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm