From: Paul Mackerras <paulus@xxxxxxxxx> This relaxes the requirement that the guest memory be provided as 16MB huge pages, allowing it to be provided as normal memory, i.e. in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index the kvm->arch.slot_phys[] arrays with a small page index, even if huge pages are being used, and use the low-order 5 bits of each entry to store the order of the enclosing page with respect to normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE). Signed-off-by: Paul Mackerras <paulus@xxxxxxxxx> Signed-off-by: Alexander Graf <agraf@xxxxxxx> --- arch/powerpc/include/asm/kvm_book3s_64.h | 10 +++ arch/powerpc/include/asm/kvm_host.h | 3 +- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 122 ++++++++++++++++++++---------- arch/powerpc/kvm/book3s_hv.c | 57 ++++++++------ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +- 7 files changed, 132 insertions(+), 69 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 7e6f2ed..10920f7 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -113,4 +113,14 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) return 0; /* error */ } +static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, + unsigned long pagesize) +{ + unsigned long mask = (pagesize >> PAGE_SHIFT) - 1; + + if (pagesize <= PAGE_SIZE) + return 1; + return !(memslot->base_gfn & mask) && !(memslot->npages & mask); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index beb22ba..9252d5e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -177,14 +177,13 @@ struct revmap_entry { }; /* Low-order bits in kvm->arch.slot_phys[][] */ +#define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned long ram_psize; - unsigned long ram_porder; unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1458c67..fb70414 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7fdc2c0..64447f6 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -237,6 +237,7 @@ #define LPCR_ISL (1ul << (63-2)) #define LPCR_VC_SH (63-2) #define LPCR_DPFD_SH (63-11) +#define LPCR_VRMASD (0x1ful << (63-16)) #define LPCR_VRMA_L (1ul << (63-12)) #define LPCR_VRMA_LP0 (1ul << (63-15)) #define LPCR_VRMA_LP1 (1ul << (63-16)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 87016cc..cc18f3d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -34,8 +34,6 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> -/* Pages in the VRMA are 16MB pages */ -#define VRMA_PAGE_ORDER 24 #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ @@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm) free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); } -void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) +/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; +} + +/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize == 0x10000) ? 0x1000 : 0; +} + +void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, + unsigned long porder) { - struct kvm *kvm = vcpu->kvm; unsigned long i; unsigned long npages; unsigned long hp_v, hp_r; unsigned long addr, hash; - unsigned long porder = kvm->arch.ram_porder; + unsigned long psize; + unsigned long hp0, hp1; long ret; - npages = kvm->arch.slot_npages[memslot->id]; + psize = 1ul << porder; + npages = memslot->npages >> (porder - PAGE_SHIFT); /* VRMA can't be > 1TB */ if (npages > 1ul << (40 - porder)) @@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) if (npages > HPT_NPTEG) npages = HPT_NPTEG; + hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | + HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); + hp1 = hpte1_pgsize_encoding(psize) | + HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + for (i = 0; i < npages; ++i) { addr = i << porder; /* can't use hpt_hash since va > 64 bits */ @@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) * is available and use it. */ hash = (hash << 3) + 7; - hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | - (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | - HPTE_V_LARGE | HPTE_V_VALID; - hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + hp_v = hp0 | ((addr >> 16) & ~0x7fUL); + hp_r = hp1 | addr; ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); if (ret != H_SUCCESS) { pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", @@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) * one already in the kvm->arch.slot_phys[][] arrays. */ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot, + unsigned long psize) { unsigned long start; - long np; - struct page *page, *pages[1]; + long np, err; + struct page *page, *hpage, *pages[1]; + unsigned long s, pgsize; unsigned long *physp; - unsigned long pfn, i; + unsigned int got, pgorder; + unsigned long pfn, i, npages; physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return -EINVAL; - i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); - if (physp[i]) + if (physp[gfn - memslot->base_gfn]) return 0; page = NULL; + pgsize = psize; start = gfn_to_hva_memslot(memslot, gfn); /* Instantiate and get the page we want access to */ @@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, if (np != 1) return -EINVAL; page = pages[0]; - - /* Check it's a 16MB page */ - if (!PageHead(page) || - compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) { - pr_err("page at %lx isn't 16MB (o=%d)\n", - start, compound_order(page)); - put_page(page); - return -EINVAL; + got = KVMPPC_GOT_PAGE; + + /* See if this is a large page */ + s = PAGE_SIZE; + if (PageHuge(page)) { + hpage = compound_head(page); + s <<= compound_order(hpage); + /* Get the whole large page if slot alignment is ok */ + if (s > psize && slot_is_aligned(memslot, s) && + !(memslot->userspace_addr & (s - 1))) { + start &= ~(s - 1); + pgsize = s; + page = hpage; + } } + err = -EINVAL; + if (s < psize) + goto out; pfn = page_to_pfn(page); + npages = pgsize >> PAGE_SHIFT; + pgorder = __ilog2(npages); + physp += (gfn - memslot->base_gfn) & ~(npages - 1); spin_lock(&kvm->arch.slot_phys_lock); - if (!physp[i]) - physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE; - else - put_page(page); + for (i = 0; i < npages; ++i) { + if (!physp[i]) { + physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder; + got = 0; + } + } spin_unlock(&kvm->arch.slot_phys_lock); + err = 0; - return 0; + out: + if (got) { + if (PageHuge(page)) + page = compound_head(page); + put_page(page); + } + return err; } /* @@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return H_PARAMETER; - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; + if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) return H_PARAMETER; preempt_disable(); @@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, struct kvm_memory_slot *memslot; unsigned long gfn = gpa >> PAGE_SHIFT; struct page *page; - unsigned long offset; - unsigned long pfn, pa; + unsigned long psize, offset; + unsigned long pa; unsigned long *physp; memslot = gfn_to_memslot(kvm, gfn); @@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return NULL; - physp += (gfn - memslot->base_gfn) >> - (kvm->arch.ram_porder - PAGE_SHIFT); + physp += gfn - memslot->base_gfn; pa = *physp; if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) return NULL; pa = *physp; } - pfn = pa >> PAGE_SHIFT; - page = pfn_to_page(pfn); + page = pfn_to_page(pa >> PAGE_SHIFT); + psize = PAGE_SIZE; + if (PageHuge(page)) { + page = compound_head(page); + psize <<= compound_order(page); + } get_page(page); - offset = gpa & (kvm->arch.ram_psize - 1); + offset = gpa & (psize - 1); if (nb_ret) - *nb_ret = kvm->arch.ram_psize - offset; + *nb_ret = psize - offset; return page_address(page) + offset; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ce5a13f..6ed0a84 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -51,8 +51,6 @@ #include <linux/highmem.h> #include <linux/hugetlb.h> -#define LARGE_PAGE_ORDER 24 /* 16MB pages */ - /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -1074,24 +1072,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } +static unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - unsigned long psize; unsigned long npages; unsigned long *phys; - /* For now, only allow 16MB-aligned slots */ - psize = kvm->arch.ram_psize; - if ((mem->memory_size & (psize - 1)) || - (mem->guest_phys_addr & (psize - 1))) { - pr_err("bad memory_size=%llx @ %llx\n", - mem->memory_size, mem->guest_phys_addr); - return -EINVAL; - } - /* Allocate a slot_phys array */ - npages = mem->memory_size >> kvm->arch.ram_porder; + npages = mem->memory_size >> PAGE_SHIFT; phys = kvm->arch.slot_phys[mem->slot]; if (!phys) { phys = vzalloc(npages * sizeof(unsigned long)); @@ -1119,6 +1119,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id) continue; pfn = physp[j] >> PAGE_SHIFT; page = pfn_to_page(pfn); + if (PageHuge(page)) + page = compound_head(page); SetPageDirty(page); put_page(page); } @@ -1141,12 +1143,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; - unsigned long lpcr; + unsigned long lpcr, senc; unsigned long psize, porder; unsigned long rma_size; unsigned long rmls; unsigned long *physp; - unsigned long i, npages, pa; + unsigned long i, npages; mutex_lock(&kvm->lock); if (kvm->arch.rma_setup_done) @@ -1168,8 +1170,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto up_out; psize = vma_kernel_pagesize(vma); - if (psize != kvm->arch.ram_psize) - goto up_out; + porder = __ilog2(psize); /* Is this one of our preallocated RMAs? */ if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && @@ -1186,13 +1187,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto out; } + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out; + /* Update VRMASD field in the LPCR */ - lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH); - lpcr |= LPCR_VRMA_L; + senc = slb_pgsize_encoding(psize); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); kvm->arch.lpcr = lpcr; /* Create HPTEs in the hash page table for the VRMA */ - kvmppc_map_vrma(vcpu, memslot); + kvmppc_map_vrma(vcpu, memslot, porder); } else { /* Set up to use an RMO region */ @@ -1231,13 +1239,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); /* Initialize phys addrs of pages in RMO */ - porder = kvm->arch.ram_porder; - npages = rma_size >> porder; - pa = ri->base_pfn << PAGE_SHIFT; + npages = ri->npages; + porder = __ilog2(npages); physp = kvm->arch.slot_phys[memslot->id]; spin_lock(&kvm->arch.slot_phys_lock); for (i = 0; i < npages; ++i) - physp[i] = pa + (i << porder); + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; spin_unlock(&kvm->arch.slot_phys_lock); } @@ -1266,8 +1273,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); - kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; - kvm->arch.ram_porder = LARGE_PAGE_ORDER; kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 047c5e1..c086eb0 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -77,6 +77,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = builtin_gfn_to_memslot(kvm, gfn); if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) return H_PARAMETER; + + /* Check if the requested page fits entirely in the memslot. */ + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; slot_fn = gfn - memslot->base_gfn; physp = kvm->arch.slot_phys[memslot->id]; @@ -88,9 +92,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, pa = *physp; if (!pa) return H_TOO_HARD; + pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); pa &= PAGE_MASK; - pte_size = kvm->arch.ram_psize; if (pte_size < psize) return H_PARAMETER; if (pa && pte_size > psize) -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html