On Tue, 2019-03-19 at 17:53 +1100, Alexey Kardashevskiy wrote: > > On 19/03/2019 15:04, Suraj Jitindar Singh wrote: > > Implement a real mode handler for the H_CALL H_PAGE_INIT which can > > be > > used to zero or copy a guest page. The page is defined to be 4k and > > must > > be 4k aligned. > > > > The in-kernel real mode handler halves the time to handle this > > H_CALL > > compared to handling it in userspace for a hash guest. > > > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> > > --- > > arch/powerpc/include/asm/kvm_ppc.h | 2 + > > arch/powerpc/kvm/book3s_hv_rm_mmu.c | 144 > > ++++++++++++++++++++++++++++++++ > > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- > > 3 files changed, 147 insertions(+), 1 deletion(-) > > > > diff --git a/arch/powerpc/include/asm/kvm_ppc.h > > b/arch/powerpc/include/asm/kvm_ppc.h > > index 8e8bb1299a0e..f34f290463aa 100644 > > --- a/arch/powerpc/include/asm/kvm_ppc.h > > +++ b/arch/powerpc/include/asm/kvm_ppc.h > > @@ -653,6 +653,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, > > unsigned long flags, > > unsigned long pte_index); > > long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long > > flags, > > unsigned long pte_index); > > +long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long > > flags, > > + unsigned long dest, unsigned long src); > > long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long > > addr, > > unsigned long slb_v, unsigned int > > status, bool data); > > unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); > > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > > b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > > index 3b3791ed74a6..26cfe1480ff5 100644 > > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > > @@ -867,6 +867,150 @@ long kvmppc_h_clear_mod(struct kvm_vcpu > > *vcpu, unsigned long flags, > > return ret; > > } > > > > +static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long > > gpa, > > + int writing, unsigned long *hpa, > > + struct kvm_memory_slot **memslot_p) > > +{ > > + struct kvm *kvm = vcpu->kvm; > > + struct kvm_memory_slot *memslot; > > + unsigned long gfn, hva, pa, psize = PAGE_SHIFT; > > + unsigned int shift; > > + pte_t *ptep, pte; > > + > > + /* Find the memslot for this address */ > > + gfn = gpa >> PAGE_SHIFT; > > + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); > > + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) > > + return H_PARAMETER; > > + > > + /* Translate to host virtual address */ > > + hva = __gfn_to_hva_memslot(memslot, gfn); > > + > > + /* Try to find the host pte for that virtual address */ > > + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, > > &shift); > > + if (!ptep) > > + return H_TOO_HARD; > > + pte = kvmppc_read_update_linux_pte(ptep, writing); > > + if (!pte_present(pte)) > > + return H_TOO_HARD; > > + > > + /* Convert to a physical address */ > > + if (shift) > > + psize = 1UL << shift; > > + pa = pte_pfn(pte) << PAGE_SHIFT; > > + pa |= hva & (psize - 1); > > + pa |= gpa & ~PAGE_MASK; > > + > > + if (hpa) > > + *hpa = pa; > > > hpa is always not null. For now that is the case. I feel like it's better to check incase someone reuses the function in future. > > > > + if (memslot_p) > > + *memslot_p = memslot; > > memslot_p!=NULL only when writing==1, you can safely drop the writing > parameter. As above. > > > > + > > + return H_SUCCESS; > > +} > > + > > +static int kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, > > unsigned long dest) > > +{ > > + struct kvm *kvm = vcpu->kvm; > > + struct kvm_memory_slot *memslot; > > + unsigned long pa; > > + unsigned long mmu_seq; > > Nit: the two lines above can be one. True :) > > > + int i, ret = H_SUCCESS; > > + > > + /* Used later to detect if we might have been invalidated > > */ > > + mmu_seq = kvm->mmu_notifier_seq; > > + smp_rmb(); > > + > > + ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); > > + if (ret != H_SUCCESS) > > + return ret; > > + > > + /* Check if we've been invalidated */ > > + spin_lock(&kvm->mmu_lock); > > + if (mmu_notifier_retry(kvm, mmu_seq)) { > > + ret = H_TOO_HARD; > > + goto out_unlock; > > + } > > + > > + /* Zero the page */ > > + for (i = 0; i < 4096; i += L1_CACHE_BYTES, pa += > > L1_CACHE_BYTES) > > + dcbz((void *)pa); > > + kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, > > PAGE_SIZE); > > + > > +out_unlock: > > + spin_unlock(&kvm->mmu_lock); > > + return ret; > > +} > > + > > +static int kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, > > unsigned long dest, > > + unsigned long src) > > +{ > > + struct kvm *kvm = vcpu->kvm; > > + struct kvm_memory_slot *dest_memslot; > > + unsigned long dest_pa, src_pa; > > + unsigned long mmu_seq; > > + int ret = H_SUCCESS; > > + > > + /* Used later to detect if we might have been invalidated > > */ > > + mmu_seq = kvm->mmu_notifier_seq; > > + smp_rmb(); > > + > > + ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, > > &dest_memslot); > > + if (ret != H_SUCCESS) > > + return ret; > > + ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); > > + if (ret != H_SUCCESS) > > + return ret; > > + > > + /* Check if we've been invalidated */ > > + spin_lock(&kvm->mmu_lock); > > I am no expect on spin_lock but from my memory in real mode it should > be > at least raw_spin_lock as CONFIG_DEBUG_SPINLOCK (when defined) can > break > things horribly. I am also no expert. I'll take your word for it. > > > > + if (mmu_notifier_retry(kvm, mmu_seq)) { > > + ret = H_TOO_HARD; > > + goto out_unlock; > > + } > > + > > + /* Copy the page */ > > + memcpy((void *)dest_pa, (void *)src_pa, 4096); > > > s/4096/SZ_4K/ yep > > > + > > + kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, > > PAGE_SIZE); > > + > > +out_unlock: > > + spin_unlock(&kvm->mmu_lock); > > + return ret; > > +} > > + > > +long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long > > flags, > > + unsigned long dest, unsigned long src) > > +{ > > + struct kvm *kvm = vcpu->kvm; > > + u64 pg_sz = 1UL << 12; /* 4K page size */ > > + u64 pg_mask = pg_sz - 1; > > Same comment about SZ_4K as in 2/3. yep > > > > + int ret = H_SUCCESS; > > > Usually H_SUCCESS/... codes are long and EINVAL/... are int. Same for > 2/3. The rule is not used 100% of time but nevertheless. Ok, will do > > > > + > > + /* Don't handle radix mode here, go up to the virtual mode > > handler */ > > + if (kvm_is_radix(kvm)) > > + return H_TOO_HARD; > > + > > + /* Check for invalid flags (H_PAGE_SET_LOANED covers all > > CMO flags) */ > > + if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | > > + H_ZERO_PAGE | H_COPY_PAGE | > > H_PAGE_SET_LOANED)) > > + return H_PARAMETER; > > + > > + /* dest (and src if copy_page flag set) must be page > > aligned */ > > + if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & > > pg_mask))) > > + return H_PARAMETER; > > + > > + /* zero and/or copy the page as determined by the flags */ > > + if (flags & H_ZERO_PAGE) > > + ret = kvmppc_do_h_page_init_zero(vcpu, dest); > > "else" here? yeah and I'll flip the order of zero and copy > > > + if (flags & H_COPY_PAGE) > > + ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); > > > else if (src) > return H_PARAMETER; > > > + > > + /* We can ignore the other flags */ > > + > > + return ret; > > +} > > + > > void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, > > unsigned long pte_index) > > { > > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > index 9b8d50a7cbaf..5927497e7bbf 100644 > > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > @@ -2268,7 +2268,7 @@ hcall_real_table: > > .long DOTSYM(kvmppc_rm_h_put_tce) - > > hcall_real_table > > .long 0 /* 0x24 - H_SET_SPRG0 */ > > .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table > > - .long 0 /* 0x2c */ > > + .long DOTSYM(kvmppc_rm_h_page_init) - > > hcall_real_table > > .long 0 /* 0x30 */ > > .long 0 /* 0x34 */ > > .long 0 /* 0x38 */ > > > >