Starting with kernel v4.6, page tables store physical addresses in upper page table tree levels for book3s. In book3s, Page table for 64K pagesize is switched to 4-level (pud not folded anymore) to support both hash and radix page tables in a single kernel. Also, the pmd masked bits value is changed since kernel v4.5 for book3e. This patch updates the corresponding changes here. Signed-off-by: Hari Bathini <hbathini@xxxxxxxxxxxxxxxxxx> --- defs.h | 34 +++++++++++++++++--- ppc64.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 129 insertions(+), 13 deletions(-) diff --git a/defs.h b/defs.h index ce49f18..a56fa65 100644 --- a/defs.h +++ b/defs.h @@ -3812,6 +3812,8 @@ struct efi_memory_desc_t { #define PUD_INDEX_SIZE_L4_4K_3_7 9 #define PTE_RPN_SHIFT_L4_4K 17 #define PTE_RPN_SHIFT_L4_4K_4_5 18 +#define PGD_MASKED_BITS_4K 0 +#define PUD_MASKED_BITS_4K 0 #define PMD_MASKED_BITS_4K 0 /* 64K pagesize */ @@ -3822,15 +3824,30 @@ struct efi_memory_desc_t { #define PTE_INDEX_SIZE_L4_64K_3_10 8 #define PMD_INDEX_SIZE_L4_64K_3_10 10 #define PGD_INDEX_SIZE_L4_64K_3_10 12 +#define PMD_INDEX_SIZE_L4_64K_4_6 5 +#define PUD_INDEX_SIZE_L4_64K_4_6 5 #define PTE_RPN_SHIFT_L4_64K_V1 32 #define PTE_RPN_SHIFT_L4_64K_V2 30 #define PTE_RPN_SHIFT_L4_BOOK3E_64K 28 #define PTE_RPN_SHIFT_L4_BOOK3E_4K 24 +#define PGD_MASKED_BITS_64K 0 +#define PUD_MASKED_BITS_64K 0x1ff #define PMD_MASKED_BITS_64K 0x1ff +#define PMD_MASKED_BITS_64K_3_11 0xfff +#define PMD_MASKED_BITS_BOOK3E_64K_4_5 0x7ff +#define PGD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL +#define PUD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL +#define PMD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL + +#define PTE_RPN_MASK_DEFAULT 0xffffffffffffffffUL +#define PTE_RPN_SIZE_L4_4_6 (PAGESIZE() == PPC64_64K_PAGE_SIZE ? 41 : 45) +#define PTE_RPN_MASK_L4_4_6 (((1UL << PTE_RPN_SIZE_L4_4_6) - 1) << PAGESHIFT()) +#define PTE_RPN_SHIFT_L4_4_6 PAGESHIFT() #define PD_HUGE 0x8000000000000000 #define HUGE_PTE_MASK 0x03 #define HUGEPD_SHIFT_MASK 0x3f +#define HUGEPD_ADDR_MASK (0x0fffffffffffffffUL & ~HUGEPD_SHIFT_MASK) #define PGD_MASK_L4 \ (THIS_KERNEL_VERSION >= LINUX(3,10,0) ? (machdep->ptrs_per_pgd - 1) : 0x1ff) @@ -3855,6 +3872,9 @@ struct efi_memory_desc_t { #define _PAGE_DIRTY (machdep->machspec->_page_dirty) /* C: page changed */ #define _PAGE_ACCESSED (machdep->machspec->_page_accessed) /* R: page referenced */ +#define PTE_RPN_MASK (machdep->machspec->pte_rpn_mask) +#define PTE_RPN_SHIFT (machdep->machspec->pte_rpn_shift) + #define TIF_SIGPENDING (2) #define SWP_TYPE(entry) (((entry) >> 1) & 0x7f) @@ -5690,7 +5710,10 @@ struct machine_specific { uint l1_shift; uint pte_rpn_shift; - uint l2_masked_bits; + ulong pte_rpn_mask; + ulong pgd_masked_bits; + ulong pud_masked_bits; + ulong pmd_masked_bits; int vmemmap_cnt; int vmemmap_psize; @@ -5714,10 +5737,11 @@ void ppc64_init(int); void ppc64_dump_machdep_table(ulong); #define display_idt_table() \ error(FATAL, "-d option is not applicable to PowerPC architecture\n") -#define KSYMS_START (0x1) -#define VM_ORIG (0x2) -#define VMEMMAP_AWARE (0x4) -#define BOOK3E (0x8) +#define KSYMS_START (0x1) +#define VM_ORIG (0x2) +#define VMEMMAP_AWARE (0x4) +#define BOOK3E (0x8) +#define PHYS_ENTRY_L4 (0x10) #define REGION_SHIFT (60UL) #define REGION_ID(addr) (((unsigned long)(addr)) >> REGION_SHIFT) diff --git a/ppc64.c b/ppc64.c index 8733d64..4e18513 100644 --- a/ppc64.c +++ b/ppc64.c @@ -59,6 +59,9 @@ static uint get_ptetype(ulong pte); static int is_hugepage(ulong pte); static int is_hugepd(ulong pte); static ulong hugepage_dir(ulong pte); +static ulong pgd_page_vaddr_l4(ulong pgd); +static ulong pud_page_vaddr_l4(ulong pud); +static ulong pmd_page_vaddr_l4(ulong pmd); static inline uint get_ptetype(ulong pte) { @@ -127,10 +130,57 @@ static inline ulong hugepage_dir(ulong pte) if ((machdep->flags & BOOK3E) || (THIS_KERNEL_VERSION < LINUX(3,10,0))) return (ulong)((pte & ~HUGEPD_SHIFT_MASK) | PD_HUGE); - else + else if (machdep->flags & PHYS_ENTRY_L4) + return PTOV(pte & ~HUGEPD_ADDR_MASK); + else /* BOOK3S, kernel v3.10 - v4.4 */ return (ulong)(pte & ~HUGEPD_SHIFT_MASK); } +static inline ulong pgd_page_vaddr_l4(ulong pgd) +{ + ulong pgd_val; + + pgd_val = (pgd & ~machdep->machspec->pgd_masked_bits); + if (machdep->flags & PHYS_ENTRY_L4) { + /* + * physical address is stored starting from kernel v4.6 + */ + pgd_val = PTOV(pgd_val); + } + + return pgd_val; +} + +static inline ulong pud_page_vaddr_l4(ulong pud) +{ + ulong pud_val; + + pud_val = (pud & ~machdep->machspec->pud_masked_bits); + if (machdep->flags & PHYS_ENTRY_L4) { + /* + * physical address is stored starting from kernel v4.6 + */ + pud_val = PTOV(pud_val); + } + + return pud_val; +} + +static inline ulong pmd_page_vaddr_l4(ulong pmd) +{ + ulong pmd_val; + + pmd_val = (pmd & ~machdep->machspec->pmd_masked_bits); + if (machdep->flags & PHYS_ENTRY_L4) { + /* + * physical address is stored starting from kernel v4.6 + */ + pmd_val = PTOV(pmd_val); + } + + return pmd_val; +} + static int book3e_is_kvaddr(ulong addr) { return (addr >= BOOK3E_VMBASE); @@ -322,10 +372,12 @@ ppc64_init(int when) * Starting with kernel v4.6, to accommodate both * radix and hash MMU modes in a single kernel, * _PAGE_PTE & _PAGE_PRESENT page flags are changed. + * Also, page table entries store physical addresses. */ if (THIS_KERNEL_VERSION >= LINUX(4,6,0)) { m->_page_pte = 0x1UL << 62; m->_page_present = 0x1UL << 63; + machdep->flags |= PHYS_ENTRY_L4; } } @@ -346,7 +398,14 @@ ppc64_init(int when) struct machine_specific *m = machdep->machspec; if (machdep->pagesize == 65536) { /* 64K pagesize */ - if (THIS_KERNEL_VERSION >= LINUX(3,10,0)) { + if (!(machdep->flags & BOOK3E) && + (THIS_KERNEL_VERSION >= LINUX(4,6,0))) { + m->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10; + m->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_6; + m->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_6; + m->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10; + + } else if (THIS_KERNEL_VERSION >= LINUX(3,10,0)) { m->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10; m->l2_index_size = PMD_INDEX_SIZE_L4_64K_3_10; m->l3_index_size = PUD_INDEX_SIZE_L4_64K; @@ -358,10 +417,27 @@ ppc64_init(int when) m->l3_index_size = PUD_INDEX_SIZE_L4_64K; m->l4_index_size = PGD_INDEX_SIZE_L4_64K; } + if (!(machdep->flags & BOOK3E)) m->pte_rpn_shift = symbol_exists("demote_segment_4k") ? PTE_RPN_SHIFT_L4_64K_V2 : PTE_RPN_SHIFT_L4_64K_V1; - m->l2_masked_bits = PMD_MASKED_BITS_64K; + + if (!(machdep->flags & BOOK3E) && + (THIS_KERNEL_VERSION >= LINUX(4,6,0))) { + m->pgd_masked_bits = PGD_MASKED_BITS_64K_4_6; + m->pud_masked_bits = PUD_MASKED_BITS_64K_4_6; + m->pmd_masked_bits = PMD_MASKED_BITS_64K_4_6; + } else { + m->pgd_masked_bits = PGD_MASKED_BITS_64K; + m->pud_masked_bits = PUD_MASKED_BITS_64K; + if ((machdep->flags & BOOK3E) && + (THIS_KERNEL_VERSION >= LINUX(4,5,0))) + m->pmd_masked_bits = PMD_MASKED_BITS_BOOK3E_64K_4_5; + else if (THIS_KERNEL_VERSION >= LINUX(3,11,0)) + m->pmd_masked_bits = PMD_MASKED_BITS_64K_3_11; + else + m->pmd_masked_bits = PMD_MASKED_BITS_64K; + } } else { /* 4K pagesize */ m->l1_index_size = PTE_INDEX_SIZE_L4_4K; @@ -371,12 +447,22 @@ ppc64_init(int when) else m->l3_index_size = PUD_INDEX_SIZE_L4_4K; m->l4_index_size = PGD_INDEX_SIZE_L4_4K; + if (machdep->flags & BOOK3E) m->pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_4K; else m->pte_rpn_shift = THIS_KERNEL_VERSION >= LINUX(4,5,0) ? PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K; - m->l2_masked_bits = PMD_MASKED_BITS_4K; + m->pgd_masked_bits = PGD_MASKED_BITS_4K; + m->pud_masked_bits = PUD_MASKED_BITS_4K; + m->pmd_masked_bits = PMD_MASKED_BITS_4K; + } + + m->pte_rpn_mask = PTE_RPN_MASK_DEFAULT; + if (!(machdep->flags & BOOK3E) && + (THIS_KERNEL_VERSION >= LINUX(4,6,0))) { + m->pte_rpn_mask = PTE_RPN_MASK_L4_4_6; + m->pte_rpn_shift = PTE_RPN_SHIFT_L4_4_6; } /* Compute ptrs per each level */ @@ -645,8 +731,11 @@ ppc64_dump_machdep_table(ulong arg) fprintf(fp, " l3_shift: %d\n", machdep->machspec->l3_shift); fprintf(fp, " l2_shift: %d\n", machdep->machspec->l2_shift); fprintf(fp, " l1_shift: %d\n", machdep->machspec->l1_shift); + fprintf(fp, " pte_rpn_mask: %lx\n", machdep->machspec->pte_rpn_mask); fprintf(fp, " pte_rpn_shift: %d\n", machdep->machspec->pte_rpn_shift); - fprintf(fp, " l2_masked_bits: %x\n", machdep->machspec->l2_masked_bits); + fprintf(fp, " pgd_masked_bits: %lx\n", machdep->machspec->pgd_masked_bits); + fprintf(fp, " pud_masked_bits: %lx\n", machdep->machspec->pud_masked_bits); + fprintf(fp, " pmd_masked_bits: %lx\n", machdep->machspec->pmd_masked_bits); fprintf(fp, " vmemmap_base: "); if (machdep->machspec->vmemmap_base) fprintf(fp, "%lx\n", machdep->machspec->vmemmap_base); @@ -786,6 +875,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose) /* Sometimes we don't have level3 pagetable entries */ if (machdep->machspec->l3_index_size != 0) { + pgd_pte = pgd_page_vaddr_l4(pgd_pte); page_upper = (ulong *)((ulong *)pgd_pte + PUD_OFFSET_L4(vaddr)); FILL_PUD(PAGEBASE(pgd_pte), KVADDR, PAGESIZE()); pud_pte = ULONG(machdep->pud + PAGEOFFSET(page_upper)); @@ -805,6 +895,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose) pud_pte = pgd_pte; } + pud_pte = pud_page_vaddr_l4(pud_pte); page_middle = (ulong *)((ulong *)pud_pte + PMD_OFFSET_L4(vaddr)); FILL_PMD(PAGEBASE(pud_pte), KVADDR, PAGESIZE()); pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(page_middle)); @@ -822,7 +913,8 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose) goto out; } - page_table = (ulong *)(pmd_pte & ~(machdep->machspec->l2_masked_bits)) + pmd_pte = pmd_page_vaddr_l4(pmd_pte); + page_table = (ulong *)(pmd_pte) + (BTOP(vaddr) & (machdep->machspec->ptrs_per_l1 - 1)); if (verbose) fprintf(fp, " PMD: %lx => %lx\n",(ulong)page_middle, @@ -867,10 +959,10 @@ out: return FALSE; } - *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_rpn_shift)) + *paddr = PAGEBASE(PTOB((pte & PTE_RPN_MASK) >> PTE_RPN_SHIFT)) + (vaddr & ((1UL << pdshift) - 1)); } else { - *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_rpn_shift)) + *paddr = PAGEBASE(PTOB((pte & PTE_RPN_MASK) >> PTE_RPN_SHIFT)) + PAGEOFFSET(vaddr); } -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility