[[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote: > Orabug: 25362942 > > Signed-off-by: Nitin Gupta <nitin.m.gupta@xxxxxxxxxx> If this wasn't an accidental git send-email misfire, then there should be a long log indicating the use case, the perforamnce increase, the testing that was done, etc. etc. Normally I'd not notice but since I was Cc'd I figured it was worth a mention -- for example the vendor ID above doesn't mean a thing to all the rest of us, hence why I suspect it was a git send-email misfire; sadly, I think we've all accidentally done that at least once.... Paul. -- > --- > arch/sparc/include/asm/page_64.h | 3 +- > arch/sparc/include/asm/pgtable_64.h | 5 +++ > arch/sparc/include/asm/tsb.h | 35 +++++++++++++++++- > arch/sparc/kernel/tsb.S | 2 +- > arch/sparc/mm/hugetlbpage.c | 74 ++++++++++++++++++++++++++----------- > arch/sparc/mm/init_64.c | 41 ++++++++++++++++---- > 6 files changed, 128 insertions(+), 32 deletions(-) > > diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h > index 5961b2d..8ee1f97 100644 > --- a/arch/sparc/include/asm/page_64.h > +++ b/arch/sparc/include/asm/page_64.h > @@ -17,6 +17,7 @@ > > #define HPAGE_SHIFT 23 > #define REAL_HPAGE_SHIFT 22 > +#define HPAGE_16GB_SHIFT 34 > #define HPAGE_2GB_SHIFT 31 > #define HPAGE_256MB_SHIFT 28 > #define HPAGE_64K_SHIFT 16 > @@ -28,7 +29,7 @@ > #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) > #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA > #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) > -#define HUGE_MAX_HSTATE 4 > +#define HUGE_MAX_HSTATE 5 > #endif > > #ifndef __ASSEMBLY__ > diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h > index 6fbd931..2444b02 100644 > --- a/arch/sparc/include/asm/pgtable_64.h > +++ b/arch/sparc/include/asm/pgtable_64.h > @@ -414,6 +414,11 @@ static inline bool is_hugetlb_pmd(pmd_t pmd) > return !!(pmd_val(pmd) & _PAGE_PMD_HUGE); > } > > +static inline bool is_hugetlb_pud(pud_t pud) > +{ > + return !!(pud_val(pud) & _PAGE_PUD_HUGE); > +} > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > static inline pmd_t pmd_mkhuge(pmd_t pmd) > { > diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h > index 32258e0..fbd8da7 100644 > --- a/arch/sparc/include/asm/tsb.h > +++ b/arch/sparc/include/asm/tsb.h > @@ -195,6 +195,36 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; > nop; \ > 699: > > + /* PUD has been loaded into REG1, interpret the value, seeing > + * if it is a HUGE PUD or a normal one. If it is not valid > + * then jump to FAIL_LABEL. If it is a HUGE PUD, and it > + * translates to a valid PTE, branch to PTE_LABEL. > + * > + * We have to propagate bits [32:22] from the virtual address > + * to resolve at 4M granularity. > + */ > +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) > +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ > + brz,pn REG1, FAIL_LABEL; \ > + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ > + sllx REG2, 32, REG2; \ > + andcc REG1, REG2, %g0; \ > + be,pt %xcc, 700f; \ > + sethi %hi(0x1ffc0000), REG2; \ > + brgez,pn REG1, FAIL_LABEL; \ > + sllx REG2, 1, REG2; \ > + brgez,pn REG1, FAIL_LABEL; \ > + andn REG1, REG2, REG1; \ > + and VADDR, REG2, REG2; \ > + brlz,pt REG1, PTE_LABEL; \ > + or REG1, REG2, REG1; \ > +700: > +#else > +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ > + brz,pn REG1, FAIL_LABEL; \ > + nop; > +#endif > + > /* PMD has been loaded into REG1, interpret the value, seeing > * if it is a HUGE PMD or a normal one. If it is not valid > * then jump to FAIL_LABEL. If it is a HUGE PMD, and it > @@ -209,14 +239,14 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; > sethi %uhi(_PAGE_PMD_HUGE), REG2; \ > sllx REG2, 32, REG2; \ > andcc REG1, REG2, %g0; \ > - be,pt %xcc, 700f; \ > + be,pt %xcc, 701f; \ > sethi %hi(4 * 1024 * 1024), REG2; \ > brgez,pn REG1, FAIL_LABEL; \ > andn REG1, REG2, REG1; \ > and VADDR, REG2, REG2; \ > brlz,pt REG1, PTE_LABEL; \ > or REG1, REG2, REG1; \ > -700: > +701: > #else > #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ > brz,pn REG1, FAIL_LABEL; \ > @@ -242,6 +272,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; > srlx REG2, 64 - PAGE_SHIFT, REG2; \ > andn REG2, 0x7, REG2; \ > ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ > + USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ > brz,pn REG1, FAIL_LABEL; \ > sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ > srlx REG2, 64 - PAGE_SHIFT, REG2; \ > diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S > index 10689cf..a0a5a13 100644 > --- a/arch/sparc/kernel/tsb.S > +++ b/arch/sparc/kernel/tsb.S > @@ -117,7 +117,7 @@ tsb_miss_page_table_walk_sun4v_fastpath: > /* Valid PTE is now in %g5. */ > > #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) > - sethi %uhi(_PAGE_PMD_HUGE), %g7 > + sethi %uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7 > sllx %g7, 32, %g7 > > andcc %g5, %g7, %g0 > diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c > index 7c29d38..62c1e62 100644 > --- a/arch/sparc/mm/hugetlbpage.c > +++ b/arch/sparc/mm/hugetlbpage.c > @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) > pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; > > switch (shift) { > + case HPAGE_16GB_SHIFT: > + hugepage_size = _PAGE_SZ16GB_4V; > + pte_val(entry) |= _PAGE_PUD_HUGE; > + break; > case HPAGE_2GB_SHIFT: > hugepage_size = _PAGE_SZ2GB_4V; > pte_val(entry) |= _PAGE_PMD_HUGE; > @@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry) > unsigned int shift; > > switch (tte_szbits) { > + case _PAGE_SZ16GB_4V: > + shift = HPAGE_16GB_SHIFT; > + break; > case _PAGE_SZ2GB_4V: > shift = HPAGE_2GB_SHIFT; > break; > @@ -263,7 +270,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, > > pgd = pgd_offset(mm, addr); > pud = pud_alloc(mm, pgd, addr); > - if (pud) { > + if (!pud) > + return NULL; > + > + if (sz >= PUD_SIZE) > + pte = (pte_t *)pud; > + else { > pmd = pmd_alloc(mm, pud, addr); > if (!pmd) > return NULL; > @@ -288,12 +300,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) > if (!pgd_none(*pgd)) { > pud = pud_offset(pgd, addr); > if (!pud_none(*pud)) { > - pmd = pmd_offset(pud, addr); > - if (!pmd_none(*pmd)) { > - if (is_hugetlb_pmd(*pmd)) > - pte = (pte_t *)pmd; > - else > - pte = pte_offset_map(pmd, addr); > + if (is_hugetlb_pud(*pud)) > + pte = (pte_t *)pud; > + else { > + pmd = pmd_offset(pud, addr); > + if (!pmd_none(*pmd)) { > + if (is_hugetlb_pmd(*pmd)) > + pte = (pte_t *)pmd; > + else > + pte = pte_offset_map(pmd, addr); > + } > } > } > } > @@ -304,12 +320,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) > void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, > pte_t *ptep, pte_t entry) > { > - unsigned int i, nptes, orig_shift, shift; > - unsigned long size; > + unsigned int nptes, orig_shift, shift; > + unsigned long i, size; > pte_t orig; > > size = huge_tte_to_size(entry); > - shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT; > + > + shift = PAGE_SHIFT; > + if (size >= PUD_SIZE) > + shift = PUD_SHIFT; > + else if (size >= PMD_SIZE) > + shift = PMD_SHIFT; > + else > + shift = PAGE_SHIFT; > + > nptes = size >> shift; > > if (!pte_present(*ptep) && pte_present(entry)) > @@ -332,19 +356,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, > pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, > pte_t *ptep) > { > - unsigned int i, nptes, hugepage_shift; > + unsigned int i, nptes, orig_shift, shift; > unsigned long size; > pte_t entry; > > entry = *ptep; > size = huge_tte_to_size(entry); > - if (size >= HPAGE_SIZE) > - nptes = size >> PMD_SHIFT; > + > + shift = PAGE_SHIFT; > + if (size >= PUD_SIZE) > + shift = PUD_SHIFT; > + else if (size >= PMD_SIZE) > + shift = PMD_SHIFT; > else > - nptes = size >> PAGE_SHIFT; > + shift = PAGE_SHIFT; > > - hugepage_shift = pte_none(entry) ? PAGE_SHIFT : > - huge_tte_to_shift(entry); > + nptes = size >> shift; > + orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry); > > if (pte_present(entry)) > mm->context.hugetlb_pte_count -= nptes; > @@ -353,11 +381,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, > for (i = 0; i < nptes; i++) > ptep[i] = __pte(0UL); > > - maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift); > + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift); > /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ > if (size == HPAGE_SIZE) > maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, > - hugepage_shift); > + orig_shift); > > return entry; > } > @@ -370,7 +398,8 @@ int pmd_huge(pmd_t pmd) > > int pud_huge(pud_t pud) > { > - return 0; > + return !pud_none(pud) && > + (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID; > } > > static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, > @@ -434,8 +463,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, > next = pud_addr_end(addr, end); > if (pud_none_or_clear_bad(pud)) > continue; > - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, > - ceiling); > + if (is_hugetlb_pud(*pud)) > + pud_clear(pud); > + else > + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, > + ceiling); > } while (pud++, addr = next, addr != end); > > start &= PGDIR_MASK; > diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c > index 0cda653..7c0fe73 100644 > --- a/arch/sparc/mm/init_64.c > +++ b/arch/sparc/mm/init_64.c > @@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string) > hugepage_shift = ilog2(hugepage_size); > > switch (hugepage_shift) { > + case HPAGE_16GB_SHIFT: > + hv_pgsz_mask = HV_PGSZ_MASK_16GB; > + hv_pgsz_idx = HV_PGSZ_IDX_16GB; > + break; > case HPAGE_2GB_SHIFT: > hv_pgsz_mask = HV_PGSZ_MASK_2GB; > hv_pgsz_idx = HV_PGSZ_IDX_2GB; > @@ -376,6 +380,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * > { > struct mm_struct *mm; > unsigned long flags; > + bool is_huge_tsb; > pte_t pte = *ptep; > > if (tlb_type != hypervisor) { > @@ -393,15 +398,37 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * > > spin_lock_irqsave(&mm->context.lock, flags); > > + is_huge_tsb = false; > #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) > - if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && > - is_hugetlb_pmd(__pmd(pte_val(pte)))) { > - /* We are fabricating 8MB pages using 4MB real hw pages. */ > - pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); > - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, > - address, pte_val(pte)); > - } else > + if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) { > + unsigned long hugepage_size = PAGE_SIZE; > + > + if (is_vm_hugetlb_page(vma)) > + hugepage_size = huge_page_size(hstate_vma(vma)); > + > + if (hugepage_size >= PUD_SIZE) { > + unsigned long mask = 0x1ffc00000UL; > + > + /* Transfer bits [32:22] from address to resolve > + * at 4M granularity. > + */ > + pte_val(pte) &= ~mask; > + pte_val(pte) |= (address & mask); > + } else if (hugepage_size >= PMD_SIZE) { > + /* We are fabricating 8MB pages using 4MB > + * real hw pages. > + */ > + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); > + } > + > + if (hugepage_size >= PMD_SIZE) { > + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, > + REAL_HPAGE_SHIFT, address, pte_val(pte)); > + is_huge_tsb = true; > + } > + } > #endif > + if (!is_huge_tsb) > __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, > address, pte_val(pte)); > > -- > 2.9.2 > -- To unsubscribe from this list: send the line "unsubscribe sparclinux" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html