> -----Original Message----- > From: Wood Scott-B07421 > Sent: Wednesday, July 31, 2013 12:19 AM > To: Bhushan Bharat-R65777 > Cc: Benjamin Herrenschmidt; Alexander Graf; kvm-ppc@xxxxxxxxxxxxxxx; > kvm@xxxxxxxxxxxxxxx; linuxppc-dev@xxxxxxxxxxxxxxxx; Wood Scott-B07421 > Subject: Re: [PATCH 4/4] kvm: powerpc: set cache coherency only for RAM pages > > On 07/30/2013 11:22:54 AM, Bhushan Bharat-R65777 wrote: > > diff --git a/arch/powerpc/kvm/e500_mmu_host.c > > b/arch/powerpc/kvm/e500_mmu_host.c > > index 5cbdc8f..a48c13f 100644 > > --- a/arch/powerpc/kvm/e500_mmu_host.c > > +++ b/arch/powerpc/kvm/e500_mmu_host.c > > @@ -40,6 +40,84 @@ > > > > static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; > > > > +/* > > + * find_linux_pte returns the address of a linux pte for a given > > + * effective address and directory. If not found, it returns zero. > > + */ > > +static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) { > > + pgd_t *pg; > > + pud_t *pu; > > + pmd_t *pm; > > + pte_t *pt = NULL; > > + > > + pg = pgdir + pgd_index(ea); > > + if (!pgd_none(*pg)) { > > + pu = pud_offset(pg, ea); > > + if (!pud_none(*pu)) { > > + pm = pmd_offset(pu, ea); > > + if (pmd_present(*pm)) > > + pt = pte_offset_kernel(pm, ea); > > + } > > + } > > + return pt; > > +} > > How is this specific to KVM or e500? > > > +#ifdef CONFIG_HUGETLB_PAGE > > +pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, > > + unsigned *shift); #else static > > +inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, > > unsigned long ea, > > + unsigned *shift) { > > + if (shift) > > + *shift = 0; > > + return find_linux_pte(pgdir, ea); } #endif /* > > +!CONFIG_HUGETLB_PAGE */ > > This is already declared in asm/pgtable.h. If we need a non-hugepage > alternative, that should also go in asm/pgtable.h. > > > +/* > > + * Lock and read a linux PTE. If it's present and writable, > > atomically > > + * set dirty and referenced bits and return the PTE, otherwise > > return 0. > > + */ > > +static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int > > writing) > > +{ > > + pte_t pte = pte_val(*p); > > + > > + if (pte_present(pte)) { > > + pte = pte_mkyoung(pte); > > + if (writing && pte_write(pte)) > > + pte = pte_mkdirty(pte); > > + } > > + > > + *p = pte; > > + > > + return pte; > > +} > > + > > +static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, > > + int writing, unsigned long *pte_sizep) { > > + pte_t *ptep; > > + unsigned long ps = *pte_sizep; > > + unsigned int shift; > > + > > + ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); > > + if (!ptep) > > + return __pte(0); > > + if (shift) > > + *pte_sizep = 1ul << shift; > > + else > > + *pte_sizep = PAGE_SIZE; > > + > > + if (ps > *pte_sizep) > > + return __pte(0); > > + if (!pte_present(*ptep)) > > + return __pte(0); > > + > > + return kvmppc_read_update_linux_pte(ptep, writing); } > > + > > None of this belongs in this file either. > > > @@ -326,8 +405,8 @@ static void kvmppc_e500_setup_stlbe( > > > > /* Force IPROT=0 for all guest mappings. */ > > stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | > > MAS1_VALID; > > - stlbe->mas2 = (gvaddr & MAS2_EPN) | > > - e500_shadow_mas2_attrib(gtlbe->mas2, pfn); > > + stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & > > E500_TLB_WIMGE_MASK); > > +// e500_shadow_mas2_attrib(gtlbe->mas2, pfn); > > MAS2_E and MAS2_G should be safe to come from the guest. This is handled when setting WIMGE in ref->flags. > > How does this work for TLB1? One ref corresponds to one guest entry, which may > correspond to multiple host entries, potentially each with different WIM > settings. Yes, one ref corresponds to one guest entry. To understand how this will work when a one guest tlb1 entry may maps to many host tlb0/1 entry; on guest tlbwe, KVM setup one guest tlb entry and then pre-map one host tlb entry (out of many) and ref (ref->pfn etc) points to this pre-map entry for that guest entry. Now a guest TLB miss happens which falls on same guest tlb entry and but demands another host tlb entry. In that flow we change/overwrite ref (ref->pfn etc) to point to new host mapping for same guest mapping. > > > stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | > > e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); > > > > @@ -346,6 +425,8 @@ static inline int kvmppc_e500_shadow_map(struct > > kvmppc_vcpu_e500 *vcpu_e500, > > unsigned long hva; > > int pfnmap = 0; > > int tsize = BOOK3E_PAGESZ_4K; > > + pte_t pte; > > + int wimg = 0; > > > > /* > > * Translate guest physical to true physical, acquiring @@ > > -451,6 +532,8 @@ static inline int kvmppc_e500_shadow_map(struct > > kvmppc_vcpu_e500 *vcpu_e500, > > > > if (likely(!pfnmap)) { > > unsigned long tsize_pages = 1 << (tsize + 10 - > > PAGE_SHIFT); > > + pgd_t *pgdir; > > + > > pfn = gfn_to_pfn_memslot(slot, gfn); > > if (is_error_noslot_pfn(pfn)) { > > printk(KERN_ERR "Couldn't get real page for > > gfn %lx!\n", @@ -461,9 +544,15 @@ static inline int > > kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, > > /* Align guest and physical address to page map > > boundaries */ > > pfn &= ~(tsize_pages - 1); > > gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); > > + pgdir = vcpu_e500->vcpu.arch.pgdir; > > + pte = lookup_linux_pte(pgdir, hva, 1, &tsize_pages); > > + if (pte_present(pte)) > > + wimg = (pte >> PTE_WIMGE_SHIFT) & > > MAS2_WIMGE_MASK; > > + else > > + wimg = MAS2_I | MAS2_G; > > If the PTE is not present, then we can't map it, right? Right, we should return error :) -Bharat > So why I+G? > > -Scott -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html