> > It's kind of a chicken-and-egg situation, where the first readmem() > needs the virtual address range stuff to be in place, but we need > something out of the kernel data space to determine the virtual > address space range data. That being said, it looks possible, at > least in the case of ppc64, that perhaps we could get away with > doing a readmem() of a unity-mapped address, since at the > point where the VM-range decision is made, machdep->kvbase > has just been set. (Follow the readmem() path, and you'll see > what I mean...) But you'd have to read raw data and muck > your way through it because the embedded gdb hasn't been > invoked yet. Badari had asked the same thing earlier, about > using the THIS_KERNEL_VERSION macro, but again, the > underlying crash data to satisfy the macro doesn't get initialized > until after gdb is alive. Okay. It looks like we can delay deciding 4-level pagetable layout till POST_GDB stage. Since THIS_KERNEL_VERSION is set by then, I was able to use that instead :) Here is the updated version. Thanks, Badari
--- crash-4.0-2.10/defs.h 2005-11-07 07:44:06.000000000 -0800 +++ crash-4.0-2.10.new/defs.h 2005-11-09 10:08:59.000000000 -0800 @@ -2100,6 +2100,24 @@ struct efi_memory_desc_t { #define PGD_OFFSET(vaddr) ((vaddr >> PGDIR_SHIFT) & 0x7ff) #define PMD_OFFSET(vaddr) ((vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +/* 4-level page table changes */ +#define PGDIR_SHIFT_L4 (PMD_SHIFT + (machdep->pageshift - 5)) +#define L4_SHIFT (PGDIR_SHIFT_L4 + (machdep->pageshift - 5)) + +#define PTE_SHIFT_L4 17 + +#define PTE_INDEX_SIZE_L4 9 +#define PMD_INDEX_SIZE_L4 7 +#define PGD_INDEX_SIZE_L4 7 + +#define PTRS_PER_PTE_L4 (1 << PTE_INDEX_SIZE_L4) +#define PTRS_PER_PMD_L4 (1 << PMD_INDEX_SIZE_L4) +#define PTRS_PER_PGD_L4 (1 << PGD_INDEX_SIZE_L4) + +#define L4_OFFSET(vaddr) ((vaddr >> L4_SHIFT) & 0x1ff) +#define PGD_OFFSET_L4(vaddr) ((vaddr >> PGDIR_SHIFT_L4) & (PTRS_PER_PGD_L4 - 1)) +#define PMD_OFFSET_L4(vaddr) ((vaddr >> PMD_SHIFT) & (PTRS_PER_PMD_L4 - 1)) + #define _PAGE_PRESENT 0x001UL /* software: pte contains a translation */ #define _PAGE_USER 0x002UL /* matches one of the PP bits */ #define _PAGE_RW 0x004UL /* software: user write access allowed */ @@ -3285,13 +3303,26 @@ struct machine_specific { ulong hwintrstack[NR_CPUS]; char *hwstackbuf; uint hwstacksize; + char *level4; + ulong last_level4_read; }; +#define IS_LAST_L4_READ(l4) ((ulong)(l4) == machdep->machspec->last_level4_read) + +#define FILL_L4(L4, TYPE, SIZE) \ + if (!IS_LAST_L4_READ(L4)) { \ + readmem((ulonglong)((ulong)(L4)), TYPE, machdep->machspec->level4, \ + SIZE, "level4 page", FAULT_ON_ERROR); \ + machdep->machspec->last_level4_read = (ulong)(L4); \ + } + void ppc64_init(int); void ppc64_dump_machdep_table(ulong); #define display_idt_table() \ error(FATAL, "-d option is not applicable to PowerPC architecture\n") #define KSYMS_START (0x1) +#define VM_ORIG (0x2) +#define VM_4_LEVEL (0x4) #endif /* --- crash-4.0-2.10/ppc64.c 2005-11-07 07:44:06.000000000 -0800 +++ crash-4.0-2.10.new/ppc64.c 2005-11-09 10:07:30.000000000 -0800 @@ -47,6 +47,7 @@ static ulong ppc64_in_irqstack(ulong); static char * ppc64_check_eframe(struct ppc64_pt_regs *); static void ppc64_print_eframe(char *, struct ppc64_pt_regs *, struct bt_info *); +static void parse_cmdline_arg(void); struct machine_specific ppc64_machine_specific = { { 0 }, 0, 0 }; @@ -75,12 +76,17 @@ ppc64_init(int when) error(FATAL, "cannot malloc pmd space."); if ((machdep->ptbl = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc ptbl space."); + if ((machdep->machspec->level4 = (char *)malloc(PAGESIZE())) == NULL) + error(FATAL, "cannot malloc level4 space."); machdep->last_pgd_read = 0; machdep->last_pmd_read = 0; machdep->last_ptbl_read = 0; + machdep->machspec->last_level4_read = 0; machdep->verify_paddr = generic_verify_paddr; machdep->ptrs_per_pgd = PTRS_PER_PGD; machdep->flags |= MACHDEP_BT_TEXT; + if (machdep->cmdline_arg) + parse_cmdline_arg(); break; case PRE_GDB: @@ -109,6 +115,26 @@ ppc64_init(int when) break; case POST_GDB: + if (!(machdep->flags & (VM_ORIG|VM_4_LEVEL))) { + if (THIS_KERNEL_VERSION >= LINUX(2,6,14)) { + machdep->flags |= VM_4_LEVEL; + } else { + machdep->flags |= VM_ORIG; + } + } + switch (machdep->flags & (VM_ORIG|VM_4_LEVEL)) + { + case VM_ORIG: + /* pre-2.6.14 layout */ + free(machdep->machspec->level4); + machdep->machspec->level4 = NULL; + machdep->ptrs_per_pgd = PTRS_PER_PGD; + break; + case VM_4_LEVEL: + /* 2.6.14 layout */ + machdep->ptrs_per_pgd = PTRS_PER_PGD_L4; + break; + } machdep->vmalloc_start = ppc64_vmalloc_start; MEMBER_OFFSET_INIT(thread_struct_pg_tables, "thread_struct", "pg_tables"); @@ -231,6 +257,10 @@ ppc64_dump_machdep_table(ulong arg) fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); if (machdep->flags & MACHDEP_BT_TEXT) fprintf(fp, "%sMACHDEP_BT_TEXT", others++ ? "|" : ""); + if (machdep->flags & VM_ORIG) + fprintf(fp, "%sVM_ORIG", others++ ? "|" : ""); + if (machdep->flags & VM_4_LEVEL) + fprintf(fp, "%sVM_4_LEVEL", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); @@ -340,7 +370,7 @@ ppc64_vtop(ulong vaddr, ulong *pgd, phys if (!(pte & _PAGE_PRESENT)) { if (pte && verbose) { fprintf(fp, "\n"); - ppc64_translate_pte(pte, 0, 0); + ppc64_translate_pte(pte, 0, PTE_SHIFT); } return FALSE; } @@ -352,7 +382,86 @@ ppc64_vtop(ulong vaddr, ulong *pgd, phys if (verbose) { fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); - ppc64_translate_pte(pte, 0, 0); + ppc64_translate_pte(pte, 0, PTE_SHIFT); + } + + return TRUE; +} + +/* + * Virtual to physical memory translation. This function will be called + * by both ppc64_kvtop and ppc64_uvtop. + */ +static int +ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose) +{ + ulong *level4_dir; + ulong *page_dir; + ulong *page_middle; + ulong *page_table; + ulong level4_pte, pgd_pte, pmd_pte; + ulong pte; + + if (verbose) + fprintf(fp, "PAGE DIRECTORY: %lx\n", (ulong)level4); + + level4_dir = (ulong *)((ulong *)level4 + L4_OFFSET(vaddr)); + FILL_L4(PAGEBASE(level4), KVADDR, PAGESIZE()); + level4_pte = ULONG(machdep->machspec->level4 + PAGEOFFSET(level4_dir)); + if (verbose) + fprintf(fp, " L4: %lx => %lx\n", (ulong)level4_dir, level4_pte); + if (!level4_pte) + return FALSE; + + page_dir = (ulong *)((ulong *)level4_pte + PGD_OFFSET_L4(vaddr)); + FILL_PGD(PAGEBASE(level4_pte), KVADDR, PAGESIZE()); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(page_dir)); + + if (verbose) + fprintf(fp, " PGD: %lx => %lx\n", (ulong)page_dir, pgd_pte); + + if (!pgd_pte) + return FALSE; + + page_middle = (ulong *)((ulong *)pgd_pte + PMD_OFFSET_L4(vaddr)); + FILL_PMD(PAGEBASE(pgd_pte), KVADDR, PAGESIZE()); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(page_middle)); + + if (verbose) + fprintf(fp, " PMD: %lx => %lx\n", (ulong)page_middle, pmd_pte); + + if (!(pmd_pte)) + return FALSE; + + page_table = (ulong *)pmd_pte + (BTOP(vaddr) & (PTRS_PER_PTE_L4 - 1)); + if (verbose) + fprintf(fp, " vaddr %lx BTOP %lx PTRS_PER_PTE_L4 %lx\n", vaddr, BTOP(vaddr), PTRS_PER_PTE_L4); + if (verbose) + fprintf(fp, " PMD: %lx => %lx\n",(ulong)page_middle, + (ulong)page_table); + + FILL_PTBL(PAGEBASE(pmd_pte), KVADDR, PAGESIZE()); + pte = ULONG(machdep->ptbl + PAGEOFFSET(page_table)); + + if (verbose) + fprintf(fp, " PTE: %lx => %lx\n", (ulong)page_table, pte); + + if (!(pte & _PAGE_PRESENT)) { + if (pte && verbose) { + fprintf(fp, "\n"); + ppc64_translate_pte(pte, 0, PTE_SHIFT_L4); + } + return FALSE; + } + + if (!pte) + return FALSE; + + *paddr = PAGEBASE(PTOB(pte >> PTE_SHIFT_L4)) + PAGEOFFSET(vaddr); + + if (verbose) { + fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); + ppc64_translate_pte(pte, 0, PTE_SHIFT_L4); } return TRUE; @@ -409,7 +518,10 @@ ppc64_uvtop(struct task_context *tc, ulo FAULT_ON_ERROR); } - return ppc64_vtop(vaddr, pgd, paddr, verbose); + if (machdep->flags & VM_4_LEVEL) + return ppc64_vtop_level4(vaddr, pgd, paddr, verbose); + else + return ppc64_vtop(vaddr, pgd, paddr, verbose); } /* @@ -434,7 +546,10 @@ ppc64_kvtop(struct task_context *tc, ulo return TRUE; } - return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); + if (machdep->flags & VM_4_LEVEL) + return ppc64_vtop_level4(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); + else + return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); } /* @@ -655,7 +770,7 @@ ppc64_get_task_pgd(ulong task) * If a physaddr pointer is passed in, don't print anything. */ static int -ppc64_translate_pte(ulong pte, void *physaddr, ulonglong unused) +ppc64_translate_pte(ulong pte, void *physaddr, ulonglong pte_shift) { int c, len1, len2, len3, others, page_present; char buf[BUFSIZE]; @@ -666,7 +781,7 @@ ppc64_translate_pte(ulong pte, void *phy char *arglist[MAXARGS]; ulong paddr; - paddr = PTOB(pte >> PTE_SHIFT); + paddr = PTOB(pte >> pte_shift); page_present = (pte & _PAGE_PRESENT); if (physaddr) { @@ -1999,4 +2114,78 @@ ppc64_compiler_warning_stub(void) ppc64_dump_line_number(0); } +/* + * Force the VM address-range selection via: + * + * --machdep vm=orig + * --machdep vm=2.6.14 + */ + +void +parse_cmdline_arg(void) +{ + int i, c, errflag; + char *p; + char buf[BUFSIZE]; + char *arglist[MAXARGS]; + int lines = 0; + + if (!strstr(machdep->cmdline_arg, "=")) { + error(WARNING, "ignoring --machdep option: %s\n\n", + machdep->cmdline_arg); + return; + } + + strcpy(buf, machdep->cmdline_arg); + + for (p = buf; *p; p++) { + if (*p == ',') + *p = ' '; + } + + c = parse_line(buf, arglist); + + for (i = 0; i < c; i++) { + errflag = 0; + + if (STRNEQ(arglist[i], "vm=")) { + p = arglist[i] + strlen("vm="); + if (strlen(p)) { + if (STREQ(p, "orig")) { + machdep->flags |= VM_ORIG; + continue; + } else if (STREQ(p, "2.6.14")) { + machdep->flags |= VM_4_LEVEL; + continue; + } + } + } + + error(WARNING, "ignoring --machdep option: %s\n", arglist[i]); + lines++; + } + + switch (machdep->flags & (VM_ORIG|VM_4_LEVEL)) + { + case VM_ORIG: + error(NOTE, "using original PPC64 VM address ranges\n"); + lines++; + break; + + case VM_4_LEVEL: + error(NOTE, "using 4-level pagetable PPC64 VM address ranges\n"); + lines++; + break; + + case (VM_ORIG|VM_4_LEVEL): + error(WARNING, "cannot set both vm=orig and vm=2.6.14\n"); + lines++; + machdep->flags &= ~(VM_ORIG|VM_4_LEVEL); + break; + } + + if (lines) + fprintf(fp, "\n"); +} + #endif /* PPC64 */