On 11/21/13 at 05:37pm, Hari Bathini wrote: > Makedumpfile fails to filter dump for kernels build with CONFIG_SPARSEMEM_VMEMMAP > enabled as it fails to do vmemmap translations. So far, makedumpfile on ppc64 never > had to deal with vmemmap addresses (vmemmap regions) seperately to filter ppc64 > crash dumps as vmemmap regions where mapped in zone normal. But with the inclusion > of CONFIG_SPARSEMEM_VMEMMAP config option in recent kernels, vmemmap memory regions > are mapped outside zone normal. There is a need to handle vmemmap to physical address > translation seperately in this scenario. This patch provides support in makedumpfile > tool to do vmemmap to physical address translation when vmemmap regions are mapped > outside zone normal. Some kernel symbols are needed in vmcoreinfo for this changes to > be effective. The kernel patch that adds the necessary symbols to vmcoreinfo has been > posted to linuxppc devel mailing list. This patch is influenced by vmemmap to physical > address translation support code in crash utility. It is has been tested successfully > at all dump filtering levels on kernel dumps that have CONFIG_SPARSEMEM_VMEMMAP enabled > and kernel dumps with CONFIG_SPARSEMEM_VMEMMAP disabled as well. Also, successfully > tested dump filtering on already filtered vmcores (re-filtering). The patch applies > cleanly on version 1.5.4 of makedumpfile. > > Changes from v2 to v3: > 1. Changed 'flags' to a more specific name, 'flags_vmemmap' in DumpInfo structure > 2. Freeing vmemmap_buf in get_vmemmap_list_info(), in success scenario as well > > Changes in v2: > 1. Fixed return value when vmemmap list initialization fails > 2. Fixed coding style issue Hi, Hari When I tried to apply your patch to devel branch, I find several failures hence the patch does not apply. I have a few comments inline that may be helpful for you to rebase your work on top of devel branch. > > Signed-off-by: Onkar N Mahajan <onmahaja at in.ibm.com> > Signed-off-by: Hari Bathini <hbathini at linux.vnet.ibm.com> > --- > arch/ppc64.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > makedumpfile.c | 38 ++++++++++++ > makedumpfile.h | 42 +++++++++++++ > 3 files changed, 251 insertions(+), 6 deletions(-) > > diff --git a/arch/ppc64.c b/arch/ppc64.c > index c229ede..05738e7 100644 > --- a/arch/ppc64.c > +++ b/arch/ppc64.c > @@ -24,6 +24,154 @@ > #include "../elf_info.h" > #include "../makedumpfile.h" > > +/* > + * This function traverses vmemmap list to get the count of vmemmap regions > + * and populates the regions' info in info->vmemmap_list[] > + */ > +static int > +get_vmemmap_list_info(ulong head) > +{ > + int i, cnt; > + long backing_size, virt_addr_offset, phys_offset, list_offset; > + ulong curr, next; > + char *vmemmap_buf = NULL; > + > + backing_size = SIZE(vmemmap_backing); > + virt_addr_offset = OFFSET(vmemmap_backing.virt_addr); > + phys_offset = OFFSET(vmemmap_backing.phys); > + list_offset = OFFSET(vmemmap_backing.list); > + info->vmemmap_list = NULL; > + > + /* > + * Get list count by traversing the vmemmap list > + */ > + cnt = 0; > + curr = head; > + next = 0; > + do { > + if (!readmem(VADDR, (curr + list_offset), &next, > + sizeof(next))) { > + ERRMSG("Can't get vmemmap region addresses\n"); > + goto err; > + } > + curr = next; > + cnt++; > + } while ((next != 0) && (next != head)); > + > + /* > + * Using temporary buffer to save vmemmap region information > + */ > + vmemmap_buf = calloc(1, backing_size); > + if (vmemmap_buf == NULL) { > + ERRMSG("Can't allocate memory for vmemmap_buf. %s\n", > + strerror(errno)); > + goto err; > + } > + > + info->vmemmap_list = calloc(1, cnt * sizeof(struct ppc64_vmemmap)); > + if (info->vmemmap_list == NULL) { > + ERRMSG("Can't allocate memory for vmemmap_list. %s\n", > + strerror(errno)); > + goto err; > + } > + > + curr = head; > + for (i = 0; i < cnt; i++) { > + if (!readmem(VADDR, curr, vmemmap_buf, backing_size)) { > + ERRMSG("Can't get vmemmap region info\n"); > + goto err; > + } > + > + info->vmemmap_list[i].phys = ULONG(vmemmap_buf + phys_offset); > + info->vmemmap_list[i].virt = ULONG(vmemmap_buf + > + virt_addr_offset); > + curr = ULONG(vmemmap_buf + list_offset); > + > + if (info->vmemmap_list[i].virt < info->vmemmap_start) > + info->vmemmap_start = info->vmemmap_list[i].virt; > + > + if ((info->vmemmap_list[i].virt + info->vmemmap_psize) > > + info->vmemmap_end) > + info->vmemmap_end = (info->vmemmap_list[i].virt + > + info->vmemmap_psize); > + } > + > + free(vmemmap_buf); > + return cnt; > +err: > + free(vmemmap_buf); > + free(info->vmemmap_list); > + return 0; > +} > + > +/* > + * Verify that the kernel has made the vmemmap list available, > + * and if so, stash the relevant data required to make vtop > + * translations. > + */ > +static int > +ppc64_vmemmap_init(void) > +{ > + int psize, shift; > + ulong head; > + > + if ((SYMBOL(vmemmap_list) == NOT_FOUND_SYMBOL) > + || (SYMBOL(mmu_psize_defs) == NOT_FOUND_SYMBOL) > + || (SYMBOL(mmu_vmemmap_psize) == NOT_FOUND_SYMBOL) > + || (SIZE(vmemmap_backing) == NOT_FOUND_STRUCTURE) > + || (SIZE(mmu_psize_def) == NOT_FOUND_STRUCTURE) > + || (OFFSET(mmu_psize_def.shift) == NOT_FOUND_STRUCTURE) > + || (OFFSET(vmemmap_backing.phys) == NOT_FOUND_STRUCTURE) > + || (OFFSET(vmemmap_backing.virt_addr) == NOT_FOUND_STRUCTURE) > + || (OFFSET(vmemmap_backing.list) == NOT_FOUND_STRUCTURE)) > + return FALSE; > + > + if (!readmem(VADDR, SYMBOL(mmu_vmemmap_psize), &psize, sizeof(int))) > + return FALSE; > + > + if (!readmem(VADDR, SYMBOL(mmu_psize_defs) + > + (SIZE(mmu_psize_def) * psize) + > + OFFSET(mmu_psize_def.shift), &shift, sizeof(int))) > + return FALSE; > + info->vmemmap_psize = 1 << shift; > + > + if (!readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long))) > + return FALSE; > + > + /* > + * Get vmemmap list count and populate vmemmap regions info > + */ > + info->vmemmap_cnt = get_vmemmap_list_info(head); > + if (info->vmemmap_cnt == 0) > + return FALSE; > + > + info->flags_vmemmap |= VMEMMAP_AWARE; > + return TRUE; > +} > + > +/* > + * If the vmemmap address translation information is stored in the kernel, > + * make the translation. > + */ > +static unsigned long long > +ppc64_vmemmap_to_phys(unsigned long vaddr) > +{ > + int i; > + ulong offset; > + unsigned long long paddr = NOT_PADDR; > + > + for (i = 0; i < info->vmemmap_cnt; i++) { > + if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr < > + (info->vmemmap_list[i].virt + info->vmemmap_psize))) { > + offset = vaddr - info->vmemmap_list[i].virt; > + paddr = info->vmemmap_list[i].phys + offset; > + break; > + } > + } > + > + return paddr; > +} > + > int > set_ppc64_max_physmem_bits(void) > { > @@ -49,7 +197,7 @@ set_ppc64_max_physmem_bits(void) > int > get_machdep_info_ppc64(void) > { > - unsigned long vmlist, vmalloc_start; > + unsigned long vmlist, vmap_area_list, vmalloc_start; This is already fixed in devel branch: commit 150b58e Author: Baoquan He <bhe at redhat.com> Date: Mon Jul 15 20:37:14 2013 +0800 [PATCH] Add vmap_area_list definition for ppc/ppc64. > > info->section_size_bits = _SECTION_SIZE_BITS; > if (!set_ppc64_max_physmem_bits()) { > @@ -103,6 +251,16 @@ get_machdep_info_ppc64(void) > info->vmalloc_start = vmalloc_start; > DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start); > > + if (SYMBOL(vmemmap_list) != NOT_FOUND_SYMBOL) { > + info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT; > + info->vmemmap_end = info->vmemmap_start; > + if (ppc64_vmemmap_init() == FALSE) { > + ERRMSG("Can't get vmemmap list info.\n"); > + return FALSE; > + } > + DEBUG_MSG("vmemmap_start: %lx\n", info->vmemmap_start); > + } > + > return TRUE; > } > > @@ -121,14 +279,23 @@ vaddr_to_paddr_ppc64(unsigned long vaddr) > if (paddr != NOT_PADDR) > return paddr; > > - if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL) > - || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) { > - ERRMSG("Can't get necessary information for vmalloc translation.\n"); > - return NOT_PADDR; > + if ((SYMBOL(vmap_area_list) == NOT_FOUND_SYMBOL) > + || (OFFSET(vmap_area.va_start) == NOT_FOUND_STRUCTURE) > + || (OFFSET(vmap_area.list) == NOT_FOUND_STRUCTURE)) { > + if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL) > + || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) { > + ERRMSG("Can't get info for vmalloc translation.\n"); > + return NOT_PADDR; > + } > } > if (!is_vmalloc_addr_ppc64(vaddr)) > return (vaddr - info->kernel_start); > > + if ((info->flags_vmemmap && VMEMMAP_AWARE) > + && (vaddr >= info->vmemmap_start)) { > + return ppc64_vmemmap_to_phys(vaddr); > + } > + > /* > * TODO: Support vmalloc translation. > */ > diff --git a/makedumpfile.c b/makedumpfile.c > index b42565c..5035fce 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -1097,6 +1097,10 @@ get_symbol_info(void) > SYMBOL_ARRAY_LENGTH_INIT(node_remap_start_pfn, > "node_remap_start_pfn"); > > + SYMBOL_INIT(vmemmap_list, "vmemmap_list"); > + SYMBOL_INIT(mmu_psize_defs, "mmu_psize_defs"); > + SYMBOL_INIT(mmu_vmemmap_psize, "mmu_vmemmap_psize"); > + > return TRUE; > } > > @@ -1394,6 +1398,20 @@ get_structure_info(void) > OFFSET_INIT(log.len, "log", "len"); > OFFSET_INIT(log.text_len, "log", "text_len"); > > + /* > + * Get offsets of the vmemmap_backing's members. > + */ > + SIZE_INIT(vmemmap_backing, "vmemmap_backing"); > + OFFSET_INIT(vmemmap_backing.phys, "vmemmap_backing", "phys"); > + OFFSET_INIT(vmemmap_backing.virt_addr, "vmemmap_backing", "virt_addr"); > + OFFSET_INIT(vmemmap_backing.list, "vmemmap_backing", "list"); > + > + /* > + * Get offsets of the mmu_psize_def's members. > + */ > + SIZE_INIT(mmu_psize_def, "mmu_psize_def"); > + OFFSET_INIT(mmu_psize_def.shift, "mmu_psize_def", "shift"); > + > return TRUE; > } > > @@ -1580,6 +1598,9 @@ write_vmcoreinfo_data(void) > WRITE_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr); > WRITE_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr); > WRITE_SYMBOL("node_remap_start_pfn", node_remap_start_pfn); > + WRITE_SYMBOL("vmemmap_list", vmemmap_list); > + WRITE_SYMBOL("mmu_psize_defs", mmu_psize_defs); > + WRITE_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); > > /* > * write the structure size of 1st kernel > @@ -1594,6 +1615,8 @@ write_vmcoreinfo_data(void) > WRITE_STRUCTURE_SIZE("nodemask_t", nodemask_t); > WRITE_STRUCTURE_SIZE("pageflags", pageflags); > WRITE_STRUCTURE_SIZE("log", log); Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg: Understand >= v3.11-rc4 dmesg.)" > + WRITE_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing); > + WRITE_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def); > > /* > * write the member offset of 1st kernel > @@ -1631,6 +1654,11 @@ write_vmcoreinfo_data(void) > WRITE_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec); > WRITE_MEMBER_OFFSET("log.len", log.len); > WRITE_MEMBER_OFFSET("log.text_len", log.text_len); Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg: Understand >= v3.11-rc4 dmesg.)" > + WRITE_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys); > + WRITE_MEMBER_OFFSET("vmemmap_backing.virt_addr", > + vmemmap_backing.virt_addr); > + WRITE_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); > + WRITE_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); > > if (SYMBOL(node_data) != NOT_FOUND_SYMBOL) > WRITE_ARRAY_LENGTH("node_data", node_data); > @@ -1899,6 +1927,9 @@ read_vmcoreinfo(void) > READ_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr); > READ_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr); > READ_SYMBOL("node_remap_start_pfn", node_remap_start_pfn); > + READ_SYMBOL("vmemmap_list", vmemmap_list); > + READ_SYMBOL("mmu_psize_defs", mmu_psize_defs); > + READ_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); > > READ_STRUCTURE_SIZE("page", page); > READ_STRUCTURE_SIZE("mem_section", mem_section); > @@ -1910,6 +1941,8 @@ read_vmcoreinfo(void) > READ_STRUCTURE_SIZE("nodemask_t", nodemask_t); > READ_STRUCTURE_SIZE("pageflags", pageflags); > READ_STRUCTURE_SIZE("log", log); Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg: Understand >= v3.11-rc4 dmesg.)" > + READ_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing); > + READ_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def); > > READ_MEMBER_OFFSET("page.flags", page.flags); > READ_MEMBER_OFFSET("page._count", page._count); > @@ -1943,6 +1976,11 @@ read_vmcoreinfo(void) > READ_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec); > READ_MEMBER_OFFSET("log.len", log.len); > READ_MEMBER_OFFSET("log.text_len", log.text_len); Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg: Understand >= v3.11-rc4 dmesg.)" > + READ_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys); > + READ_MEMBER_OFFSET("vmemmap_backing.virt_addr", > + vmemmap_backing.virt_addr); > + READ_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); > + READ_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); > > READ_ARRAY_LENGTH("node_data", node_data); > READ_ARRAY_LENGTH("pgdat_list", pgdat_list); > diff --git a/makedumpfile.h b/makedumpfile.h > index a5826e0..a142243 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -576,6 +576,9 @@ do { \ > #define _SECTION_SIZE_BITS (24) > #define _MAX_PHYSMEM_BITS_ORIG (44) > #define _MAX_PHYSMEM_BITS_3_7 (46) > +#define REGION_SHIFT (60UL) > +#define VMEMMAP_REGION_ID (0xfUL) > +#define VMEMMAP_AWARE (0x4) > #endif > > #ifdef __powerpc32__ > @@ -862,6 +865,11 @@ struct splitting_info { > unsigned long size_eraseinfo; > } splitting_info_t; > > +struct ppc64_vmemmap { > + unsigned long phys; > + unsigned long virt; > +}; > + > struct DumpInfo { > int32_t kernel_version; /* version of first kernel*/ > struct timeval timestamp; > @@ -908,6 +916,14 @@ struct DumpInfo { > unsigned long vmalloc_end; > unsigned long vmemmap_start; > unsigned long vmemmap_end; > + int vmemmap_psize; > + int vmemmap_cnt; > + struct ppc64_vmemmap *vmemmap_list; > + unsigned long flags_vmemmap; > + > + /* > + * for vmemmap > + */ > > /* > * Filter config file containing filter commands to filter out kernel > @@ -1093,7 +1109,6 @@ struct module_info { > struct symbol_info *sym_info; > }; > > - > struct symbol_table { > unsigned long long mem_map; > unsigned long long vmem_map; > @@ -1165,6 +1180,13 @@ struct symbol_table { > unsigned long long __per_cpu_load; > unsigned long long cpu_online_mask; > unsigned long long kexec_crash_image; > + > + /* > + * vmemmap symbols on ppc64 arch > + */ > + unsigned long long vmemmap_list; > + unsigned long long mmu_vmemmap_psize; > + unsigned long long mmu_psize_defs; > }; > > struct size_table { > @@ -1200,6 +1222,12 @@ struct size_table { > long elf64_hdr; > long log; Above line has removed in commit a01b663 ("[PATCH v2] dump-dmesg: Understand >= v3.11-rc4 dmesg.)" > > + /* > + * vmemmap symbols on ppc64 arch > + */ > + long vmemmap_backing; > + long mmu_psize_def; > + > long pageflags; > }; > > @@ -1343,6 +1371,18 @@ struct offset_table { > long text_len; > } log; Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg: Understand >= v3.11-rc4 dmesg.)" Thanks WANG Chao > > + /* > + * vmemmap symbols on ppc64 arch > + */ > + struct mmu_psize_def { > + long shift; > + } mmu_psize_def; > + > + struct vmemmap_backing { > + long phys; > + long virt_addr; > + long list; > + } vmemmap_backing; > }; > > /* > > > _______________________________________________ > kexec mailing list > kexec at lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec