Hi Dave, Here are the fixes to make crash understand PPC64 4-level pagetable layout. I am not a PPC64 expert, I coded up looking at the ppc64 kernel pagetable handling code. It seems to work fine for most part. But I do get occasional, "user virtual" translation failures. I think I screwed up PMD calculations. I am not able to spot the problem. Can some one help ? Thanks, Badari elm3b157:~/crash-4.0-2.10.new # ./crash crash 4.0-2.10 Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. Copyright (C) 2004, 2005 IBM Corporation Copyright (C) 1999-2005 Hewlett-Packard Co Copyright (C) 2005 Fujitsu Limited Copyright (C) 2005 NEC Corporation Copyright (C) 1999, 2002 Silicon Graphics, Inc. Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. This program is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Enter "help copying" to see the conditions. This program has absolutely no warranty. Enter "help warranty" for details. GNU gdb 6.1 Copyright 2004 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "powerpc64-unknown-linux-gnu"... KERNEL: /usr/src/linux-2.6.14/vmlinux DUMPFILE: /dev/mem CPUS: 4 DATE: Tue Nov 8 11:02:01 2005 UPTIME: 06:41:51 LOAD AVERAGE: 0.08, 0.03, 0.06 TASKS: 103 NODENAME: elm3b157 RELEASE: 2.6.14 VERSION: #1 SMP Thu Nov 3 13:22:57 PST 2005 MACHINE: ppc64 (1655 Mhz) MEMORY: 3.7 GB PID: 32605 COMMAND: "crash" TASK: c0000000e304e040 [THREAD_INFO: c0000000934c8000] CPU: 0 STATE: TASK_RUNNING (ACTIVE) crash> set PID: 32605 COMMAND: "crash" TASK: c0000000e304e040 [THREAD_INFO: c0000000934c8000] CPU: 1 STATE: TASK_RUNNING (ACTIVE) crash> vm PID: 32605 TASK: c0000000e304e040 CPU: 1 COMMAND: "crash" MM PGD RSS TOTAL_VM c0000000dc048080 c000000093e2f000 137464k 76800k VMA START END FLAGS FILE c0000000e3c16ee8 100000 103000 75 c0000000d7b92ee8 10000000 10590000 1875 /root/crash-4.0-2.10.new/crash c0000000d7b92870 1059f000 1069a000 101873 /root/crash-4.0-2.10.new/crash c0000000d7b92420 1069a000 11858000 100073 c0000000e3979cc0 40000000000 4000001d000 875 /lib64/ld-2.3.3.so c0000000e23f57b8 4000001d000 40000022000 100073 c0000000e23f5700 4000002d000 40000030000 100873 /lib64/ld-2.3.3.so c0000000ebf55648 40000048000 400000ba000 75 /lib64/tls/libm.so.6 c0000000e3c162b0 400000ba000 400000c8000 70 /lib64/tls/libm.so.6 c0000000ebf554d8 400000c8000 400000cf000 100073 /lib64/tls/libm.so.6 c0000000ebf55368 400000cf000 40000130000 75 /lib64/libncurses.so.5.4 c0000000ebf55d78 40000130000 4000013f000 70 /lib64/libncurses.so.5.4 c0000000ebf55a98 4000013f000 40000156000 100073 /lib64/libncurses.so.5.4 c0000000ebf55420 40000156000 40000159000 100073 c0000000ebf552b0 40000159000 4000015d000 75 /lib64/libdl.so.2 c0000000ebf55e30 4000015d000 40000169000 70 /lib64/libdl.so.2 c0000000ebf55870 40000169000 4000016d000 100073 /lib64/libdl.so.2 c0000000e3979d78 4000016d000 40000184000 75 /lib64/libz.so.1.2.1 c0000000ebf55590 40000184000 4000018d000 70 /lib64/libz.so.1.2.1 c0000000e39791f8 4000018d000 40000194000 100073 /lib64/libz.so.1.2.1 c0000000ebf557b8 40000194000 40000195000 100073 c0000000e3979590 40000195000 400002d6000 75 /lib64/tls/libc.so.6 c0000000e39797b8 400002d6000 400002e5000 70 /lib64/tls/libc.so.6 c0000000e23f5870 400002e5000 40000303000 100073 /lib64/tls/libc.so.6 c0000000e3979c08 40000303000 40000307000 100073 c0000000e1308e30 40000307000 4000033a000 71 /usr/lib/locale/en_US.utf8/LC_CTYPE c0000000d7b92700 4000033a000 4000035a000 100073 c0000000eb825b50 4000035a000 40000361000 75 /lib64/tls/libthread_db.so.1 c0000000eb825368 40000361000 4000036a000 70 /lib64/tls/libthread_db.so.1 c0000000eb825870 4000036a000 40000372000 100073 /lib64/tls/libthread_db.so.1 crash> rd -u 100000 100000: 7f454c4602020100 .ELF.... crash> rd -u 40000169000 rd: invalid user virtual address: 40000169000 type: "64-bit UVADDR"
--- crash-4.0-2.10/defs.h 2005-11-07 07:44:06.000000000 -0800 +++ crash-4.0-2.10.new/defs.h 2005-11-08 10:21:05.000000000 -0800 @@ -2100,6 +2100,24 @@ struct efi_memory_desc_t { #define PGD_OFFSET(vaddr) ((vaddr >> PGDIR_SHIFT) & 0x7ff) #define PMD_OFFSET(vaddr) ((vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +/* 4-level page table changes */ +#define PGDIR_SHIFT_L4 (PMD_SHIFT + (machdep->pageshift - 5)) +#define L4_SHIFT (PGDIR_SHIFT_L4 + (machdep->pageshift - 5)) + +#define PTE_SHIFT_L4 17 + +#define PTE_INDEX_SIZE_L4 9 +#define PMD_INDEX_SIZE_L4 7 +#define PGD_INDEX_SIZE_L4 7 + +#define PTRS_PER_PTE_L4 (1 << PTE_INDEX_SIZE_L4) +#define PTRS_PER_PMD_L4 (1 << PMD_INDEX_SIZE_L4) +#define PTRS_PER_PGD_L4 (1 << PGD_INDEX_SIZE_L4) + +#define L4_OFFSET(vaddr) ((vaddr >> L4_SHIFT) & 0x1ff) +#define PGD_OFFSET_L4(vaddr) ((vaddr >> PGDIR_SHIFT_L4) & (PTRS_PER_PGD_L4 - 1)) +#define PMD_OFFSET_L4(vaddr) ((vaddr >> PMD_SHIFT) & (PTRS_PER_PMD_L4 - 1)) + #define _PAGE_PRESENT 0x001UL /* software: pte contains a translation */ #define _PAGE_USER 0x002UL /* matches one of the PP bits */ #define _PAGE_RW 0x004UL /* software: user write access allowed */ @@ -3285,13 +3303,26 @@ struct machine_specific { ulong hwintrstack[NR_CPUS]; char *hwstackbuf; uint hwstacksize; + char *level4; + ulong last_level4_read; }; +#define IS_LAST_L4_READ(l4) ((ulong)(l4) == machdep->machspec->last_level4_read) + +#define FILL_L4(L4, TYPE, SIZE) \ + if (!IS_LAST_L4_READ(L4)) { \ + readmem((ulonglong)((ulong)(L4)), TYPE, machdep->machspec->level4, \ + SIZE, "level4 page", FAULT_ON_ERROR); \ + machdep->machspec->last_level4_read = (ulong)(L4); \ + } + void ppc64_init(int); void ppc64_dump_machdep_table(ulong); #define display_idt_table() \ error(FATAL, "-d option is not applicable to PowerPC architecture\n") #define KSYMS_START (0x1) +#define VM_ORIG (0x2) +#define VM_4_LEVEL (0x4) #endif /* --- crash-4.0-2.10/ppc64.c 2005-11-07 07:44:06.000000000 -0800 +++ crash-4.0-2.10.new/ppc64.c 2005-11-08 10:56:21.000000000 -0800 @@ -75,15 +75,29 @@ ppc64_init(int when) error(FATAL, "cannot malloc pmd space."); if ((machdep->ptbl = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc ptbl space."); + if ((machdep->machspec->level4 = (char *)malloc(PAGESIZE())) == NULL) + error(FATAL, "cannot malloc level4 space."); machdep->last_pgd_read = 0; machdep->last_pmd_read = 0; machdep->last_ptbl_read = 0; + machdep->machspec->last_level4_read = 0; machdep->verify_paddr = generic_verify_paddr; machdep->ptrs_per_pgd = PTRS_PER_PGD; machdep->flags |= MACHDEP_BT_TEXT; break; case PRE_GDB: + if (!(machdep->flags & (VM_ORIG|VM_4_LEVEL))) { + if (!symbol_exists("pud_clear_bad")) { + machdep->flags |= VM_ORIG; + free(machdep->machspec->level4); + machdep->machspec->level4 = NULL; + machdep->ptrs_per_pgd = PTRS_PER_PGD; + } else { + machdep->flags |= VM_4_LEVEL; + machdep->ptrs_per_pgd = PTRS_PER_PGD_L4; + } + } machdep->kvbase = symbol_value("_stext"); machdep->identity_map_base = machdep->kvbase; machdep->is_kvaddr = generic_is_kvaddr; @@ -340,7 +354,7 @@ ppc64_vtop(ulong vaddr, ulong *pgd, phys if (!(pte & _PAGE_PRESENT)) { if (pte && verbose) { fprintf(fp, "\n"); - ppc64_translate_pte(pte, 0, 0); + ppc64_translate_pte(pte, 0, PTE_SHIFT); } return FALSE; } @@ -352,7 +366,86 @@ ppc64_vtop(ulong vaddr, ulong *pgd, phys if (verbose) { fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); - ppc64_translate_pte(pte, 0, 0); + ppc64_translate_pte(pte, 0, PTE_SHIFT); + } + + return TRUE; +} + +/* + * Virtual to physical memory translation. This function will be called + * by both ppc64_kvtop and ppc64_uvtop. + */ +static int +ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose) +{ + ulong *level4_dir; + ulong *page_dir; + ulong *page_middle; + ulong *page_table; + ulong level4_pte, pgd_pte, pmd_pte; + ulong pte; + + if (verbose) + fprintf(fp, "PAGE DIRECTORY: %lx\n", (ulong)level4); + + level4_dir = (ulong *)((ulong *)level4 + L4_OFFSET(vaddr)); + FILL_L4(PAGEBASE(level4), KVADDR, PAGESIZE()); + level4_pte = ULONG(machdep->machspec->level4 + PAGEOFFSET(level4_dir)); + if (verbose) + fprintf(fp, " L4: %lx => %lx\n", (ulong)level4_dir, level4_pte); + if (!level4_pte) + return FALSE; + + page_dir = (ulong *)((ulong *)level4_pte + PGD_OFFSET_L4(vaddr)); + FILL_PGD(PAGEBASE(level4_pte), KVADDR, PAGESIZE()); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(page_dir)); + + if (verbose) + fprintf(fp, " PGD: %lx => %lx\n", (ulong)page_dir, pgd_pte); + + if (!pgd_pte) + return FALSE; + + page_middle = (ulong *)((ulong *)pgd_pte + PMD_OFFSET_L4(vaddr)); + FILL_PMD(PAGEBASE(pgd_pte), KVADDR, PAGESIZE()); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(page_middle)); + + if (verbose) + fprintf(fp, " PMD: %lx => %lx\n", (ulong)page_middle, pmd_pte); + + if (!(pmd_pte)) + return FALSE; + + page_table = (ulong *)pmd_pte + (BTOP(vaddr) & (PTRS_PER_PTE_L4 - 1)); + if (verbose) + fprintf(fp, " vaddr %lx BTOP %lx PTRS_PER_PTE_L4 %lx\n", vaddr, BTOP(vaddr), PTRS_PER_PTE_L4); + if (verbose) + fprintf(fp, " PMD: %lx => %lx\n",(ulong)page_middle, + (ulong)page_table); + + FILL_PTBL(PAGEBASE(pmd_pte), KVADDR, PAGESIZE()); + pte = ULONG(machdep->ptbl + PAGEOFFSET(page_table)); + + if (verbose) + fprintf(fp, " PTE: %lx => %lx\n", (ulong)page_table, pte); + + if (!(pte & _PAGE_PRESENT)) { + if (pte && verbose) { + fprintf(fp, "\n"); + ppc64_translate_pte(pte, 0, PTE_SHIFT_L4); + } + return FALSE; + } + + if (!pte) + return FALSE; + + *paddr = PAGEBASE(PTOB(pte >> PTE_SHIFT_L4)) + PAGEOFFSET(vaddr); + + if (verbose) { + fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); + ppc64_translate_pte(pte, 0, PTE_SHIFT_L4); } return TRUE; @@ -409,7 +502,10 @@ ppc64_uvtop(struct task_context *tc, ulo FAULT_ON_ERROR); } - return ppc64_vtop(vaddr, pgd, paddr, verbose); + if (machdep->flags & VM_4_LEVEL) + return ppc64_vtop_level4(vaddr, pgd, paddr, verbose); + else + return ppc64_vtop(vaddr, pgd, paddr, verbose); } /* @@ -434,7 +530,10 @@ ppc64_kvtop(struct task_context *tc, ulo return TRUE; } - return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); + if (machdep->flags & VM_4_LEVEL) + return ppc64_vtop_level4(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); + else + return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); } /* @@ -655,7 +754,7 @@ ppc64_get_task_pgd(ulong task) * If a physaddr pointer is passed in, don't print anything. */ static int -ppc64_translate_pte(ulong pte, void *physaddr, ulonglong unused) +ppc64_translate_pte(ulong pte, void *physaddr, ulonglong pte_shift) { int c, len1, len2, len3, others, page_present; char buf[BUFSIZE]; @@ -666,7 +765,7 @@ ppc64_translate_pte(ulong pte, void *phy char *arglist[MAXARGS]; ulong paddr; - paddr = PTOB(pte >> PTE_SHIFT); + paddr = PTOB(pte >> pte_shift); page_present = (pte & _PAGE_PRESENT); if (physaddr) {