Add functions to exclude hugepage from vmcore dump. Signed-off-by: Jingbai Ma <jingbai.ma at hp.com> --- makedumpfile.c | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ makedumpfile.h | 19 ++++ 2 files changed, 289 insertions(+), 2 deletions(-) diff --git a/makedumpfile.c b/makedumpfile.c index b42565c..f0b2531 100644 --- a/makedumpfile.c +++ b/makedumpfile.c @@ -46,6 +46,8 @@ unsigned long long pfn_cache_private; unsigned long long pfn_user; unsigned long long pfn_free; unsigned long long pfn_hwpoison; +unsigned long long pfn_free_huge; +unsigned long long pfn_active_huge; unsigned long long num_dumped; @@ -1038,6 +1040,7 @@ get_symbol_info(void) SYMBOL_INIT(mem_map, "mem_map"); SYMBOL_INIT(vmem_map, "vmem_map"); SYMBOL_INIT(mem_section, "mem_section"); + SYMBOL_INIT(hstates, "hstates"); SYMBOL_INIT(pkmap_count, "pkmap_count"); SYMBOL_INIT_NEXT(pkmap_count_next, "pkmap_count"); SYMBOL_INIT(system_utsname, "system_utsname"); @@ -1174,6 +1177,19 @@ get_structure_info(void) OFFSET_INIT(list_head.prev, "list_head", "prev"); /* + * Get offsets of the hstate's members. + */ + SIZE_INIT(hstate, "hstate"); + OFFSET_INIT(hstate.order, "hstate", "order"); + OFFSET_INIT(hstate.nr_huge_pages, "hstate", "nr_huge_pages"); + OFFSET_INIT(hstate.free_huge_pages, "hstate", "free_huge_pages"); + OFFSET_INIT(hstate.hugepage_activelist, "hstate", + "hugepage_activelist"); + OFFSET_INIT(hstate.hugepage_freelists, "hstate", "hugepage_freelists"); + MEMBER_ARRAY_LENGTH_INIT(hstate.hugepage_freelists, "hstate", + "hugepage_freelists"); + + /* * Get offsets of the node_memblk_s's members. */ SIZE_INIT(node_memblk_s, "node_memblk_s"); @@ -1555,6 +1571,7 @@ write_vmcoreinfo_data(void) WRITE_SYMBOL("mem_map", mem_map); WRITE_SYMBOL("vmem_map", vmem_map); WRITE_SYMBOL("mem_section", mem_section); + WRITE_SYMBOL("hstates", hstates); WRITE_SYMBOL("pkmap_count", pkmap_count); WRITE_SYMBOL("pkmap_count_next", pkmap_count_next); WRITE_SYMBOL("system_utsname", system_utsname); @@ -1590,6 +1607,7 @@ write_vmcoreinfo_data(void) WRITE_STRUCTURE_SIZE("zone", zone); WRITE_STRUCTURE_SIZE("free_area", free_area); WRITE_STRUCTURE_SIZE("list_head", list_head); + WRITE_STRUCTURE_SIZE("hstate", hstate); WRITE_STRUCTURE_SIZE("node_memblk_s", node_memblk_s); WRITE_STRUCTURE_SIZE("nodemask_t", nodemask_t); WRITE_STRUCTURE_SIZE("pageflags", pageflags); @@ -1628,6 +1646,13 @@ write_vmcoreinfo_data(void) WRITE_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr); WRITE_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start); WRITE_MEMBER_OFFSET("vmap_area.list", vmap_area.list); + WRITE_MEMBER_OFFSET("hstate.order", hstate.order); + WRITE_MEMBER_OFFSET("hstate.nr_huge_pages", hstate.nr_huge_pages); + WRITE_MEMBER_OFFSET("hstate.free_huge_pages", hstate.free_huge_pages); + WRITE_MEMBER_OFFSET("hstate.hugepage_activelist", + hstate.hugepage_activelist); + WRITE_MEMBER_OFFSET("hstate.hugepage_freelists", + hstate.hugepage_freelists); WRITE_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec); WRITE_MEMBER_OFFSET("log.len", log.len); WRITE_MEMBER_OFFSET("log.text_len", log.text_len); @@ -1647,6 +1672,9 @@ write_vmcoreinfo_data(void) WRITE_ARRAY_LENGTH("zone.free_area", zone.free_area); WRITE_ARRAY_LENGTH("free_area.free_list", free_area.free_list); + WRITE_ARRAY_LENGTH("hstate.hugepage_freelists", + hstate.hugepage_freelists); + WRITE_NUMBER("NR_FREE_PAGES", NR_FREE_PAGES); WRITE_NUMBER("N_ONLINE", N_ONLINE); @@ -1659,6 +1687,8 @@ write_vmcoreinfo_data(void) WRITE_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE); + WRITE_NUMBER("HUGE_MAX_HSTATE", HUGE_MAX_HSTATE); + /* * write the source file of 1st kernel */ @@ -1874,6 +1904,7 @@ read_vmcoreinfo(void) READ_SYMBOL("mem_map", mem_map); READ_SYMBOL("vmem_map", vmem_map); READ_SYMBOL("mem_section", mem_section); + READ_SYMBOL("hstates", hstates); READ_SYMBOL("pkmap_count", pkmap_count); READ_SYMBOL("pkmap_count_next", pkmap_count_next); READ_SYMBOL("system_utsname", system_utsname); @@ -1906,6 +1937,7 @@ read_vmcoreinfo(void) READ_STRUCTURE_SIZE("zone", zone); READ_STRUCTURE_SIZE("free_area", free_area); READ_STRUCTURE_SIZE("list_head", list_head); + READ_STRUCTURE_SIZE("hstate", hstate); READ_STRUCTURE_SIZE("node_memblk_s", node_memblk_s); READ_STRUCTURE_SIZE("nodemask_t", nodemask_t); READ_STRUCTURE_SIZE("pageflags", pageflags); @@ -1940,6 +1972,13 @@ read_vmcoreinfo(void) READ_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr); READ_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start); READ_MEMBER_OFFSET("vmap_area.list", vmap_area.list); + READ_MEMBER_OFFSET("hstate.order", hstate.order); + READ_MEMBER_OFFSET("hstate.nr_huge_pages", hstate.nr_huge_pages); + READ_MEMBER_OFFSET("hstate.free_huge_pages", hstate.free_huge_pages); + READ_MEMBER_OFFSET("hstate.hugepage_activelist", + hstate.hugepage_activelist); + READ_MEMBER_OFFSET("hstate.hugepage_freelists", + hstate.hugepage_freelists); READ_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec); READ_MEMBER_OFFSET("log.len", log.len); READ_MEMBER_OFFSET("log.text_len", log.text_len); @@ -1950,6 +1989,8 @@ read_vmcoreinfo(void) READ_ARRAY_LENGTH("node_memblk", node_memblk); READ_ARRAY_LENGTH("zone.free_area", zone.free_area); READ_ARRAY_LENGTH("free_area.free_list", free_area.free_list); + READ_ARRAY_LENGTH("hstate.hugepage_freelists", + hstate.hugepage_freelists); READ_ARRAY_LENGTH("node_remap_start_pfn", node_remap_start_pfn); READ_NUMBER("NR_FREE_PAGES", NR_FREE_PAGES); @@ -1966,6 +2007,8 @@ read_vmcoreinfo(void) READ_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE); + READ_NUMBER("HUGE_MAX_HSTATE", HUGE_MAX_HSTATE); + return TRUE; } @@ -4040,6 +4083,214 @@ exclude_free_page(void) return TRUE; } +inline int +clear_huge_page(unsigned long long pfn, unsigned int order) +{ + unsigned int i; + + DEBUG_MSG("Exclude huge page. start pfn: %lld, order: %d\n", + pfn, order); + + for (i = 0; i < (1 << order); i++) { + if (!clear_bit_on_2nd_bitmap_for_kernel(pfn + i)) { + ERRMSG("Can't clear 2nd bitmap! pfn=0x%llx\n", pfn + i); + return FALSE; + } + } + + return TRUE; +} + +int +_exclude_huge_page(void) +{ + int i, node, freelist_length; + unsigned long curr_hstate, curr_page, head, curr, previous, curr_prev; + struct timeval tv_start; + unsigned long long pfn; + unsigned int order; + unsigned long nr_huge_pages, free_huge_pages, active_huge_pages; + + freelist_length = ARRAY_LENGTH(hstate.hugepage_freelists); + /* Exclude free huge pages */ + if (info->dump_level & (DL_EXCLUDE_FREE_HUGE + | DL_EXCLUDE_ACTIVE_HUGE)) { + gettimeofday(&tv_start, NULL); + for (i = 0; i < NUMBER(HUGE_MAX_HSTATE); i++) { + curr_hstate = SYMBOL(hstates) + SIZE(hstate) * i; + /* Read order */ + if (!readmem(VADDR, + curr_hstate + OFFSET(hstate.order), + &order, sizeof(order))) { + ERRMSG("Can't get hstate.order!"); + return FALSE; + } + /* Read free_huge_pages */ + if (!readmem(VADDR, + curr_hstate + OFFSET(hstate.free_huge_pages), + &free_huge_pages, sizeof(free_huge_pages))) { + ERRMSG("Can't get hstate.free_huge_pages!"); + return FALSE; + } + for (node = 0; node < freelist_length; node++) { + /* head = hstate.hugepage_freelists[node] */ + head = curr_hstate + + OFFSET(hstate.hugepage_freelists) + + SIZE(list_head) * node; + if (!readmem(VADDR, + head + OFFSET(list_head.next), + &curr, sizeof(curr))) { + ERRMSG("Can't get free list!"); + return FALSE; + } + curr_prev = head; + /* Walking free list of the node */ + while (head != curr && curr != 0) { + print_progress(PROGRESS_FREE_HUGE, + pfn_free_huge, free_huge_pages); + if (!readmem(VADDR, + curr + OFFSET(list_head.prev), + &previous, sizeof(previous))) { + ERRMSG("Can't get free list!"); + return FALSE; + } + if (previous != curr_prev) { + ERRMSG("Free list is broken!"); + return FALSE; + } + curr_page = curr - OFFSET(page.lru); + pfn = page_to_pfn(curr_page); + if (!clear_huge_page(pfn, order)) + return FALSE; + pfn_free_huge++; + curr_prev = curr; + if (!readmem(VADDR, + curr + OFFSET(list_head.next), + &curr, sizeof(curr))) { + ERRMSG("Can't get free list!"); + return FALSE; + } + } + } + } + /* + * print [100 %] + */ + print_progress(PROGRESS_FREE_HUGE, 1, 1); + print_execution_time(PROGRESS_FREE_HUGE, &tv_start); + } + + /* Exclude active huge pages */ + if (info->dump_level & DL_EXCLUDE_ACTIVE_HUGE) { + gettimeofday(&tv_start, NULL); + for (i = 0; i < NUMBER(HUGE_MAX_HSTATE); i++) { + curr_hstate = SYMBOL(hstates) + SIZE(hstate) * i; + /* Read order */ + if (!readmem(VADDR, + curr_hstate + OFFSET(hstate.order), + &order, sizeof(order))) { + ERRMSG("Can't get hstate.order!"); + return FALSE; + } + /* Read nr_huge_pages */ + if (!readmem(VADDR, + curr_hstate + OFFSET(hstate.nr_huge_pages), + &nr_huge_pages, sizeof(nr_huge_pages))) { + ERRMSG("Can't get hstate.nr_huge_pages!"); + return FALSE; + } + /* Read free_huge_pages */ + if (!readmem(VADDR, + curr_hstate + OFFSET(hstate.free_huge_pages), + &free_huge_pages, sizeof(free_huge_pages))) { + ERRMSG("Can't get hstate.free_huge_pages!"); + return FALSE; + } + if (nr_huge_pages < free_huge_pages) { + ERRMSG("nr_huge_pages < free_huge_pages!"); + return FALSE; + } + active_huge_pages = nr_huge_pages - free_huge_pages; + /* head = hstate.hugepage_freelists[node] */ + head = curr_hstate + OFFSET(hstate.hugepage_activelist); + if (!readmem(VADDR, head + OFFSET(list_head.next), + &curr, sizeof(curr))) { + ERRMSG("Can't get active list!"); + } + curr_prev = head; + /* Walking active list */ + while (head != curr && curr != 0) { + print_progress(PROGRESS_ACTIVE_HUGE, + pfn_active_huge, + active_huge_pages); + if (!readmem(VADDR, + curr + OFFSET(list_head.prev), + &previous, sizeof(previous))) { + ERRMSG("Can't get active list!"); + return FALSE; + } + if (previous != curr_prev) { + ERRMSG("Active list is broken!"); + return FALSE; + } + curr_page = curr - OFFSET(page.lru); + pfn = page_to_pfn(curr_page); + if (!clear_huge_page(pfn, order)) + return FALSE; + pfn_active_huge++; + curr_prev = curr; + if (!readmem(VADDR, + curr + OFFSET(list_head.next), + &curr, sizeof(curr))) { + ERRMSG("Can't get active list!"); + return FALSE; + } + } + } + /* + * print [100 %] + */ + print_progress(PROGRESS_ACTIVE_HUGE, 1, 1); + print_execution_time(PROGRESS_ACTIVE_HUGE, &tv_start); + } + + DEBUG_MSG("\n"); + DEBUG_MSG("free huge pages : %lld\n", pfn_free_huge); + DEBUG_MSG("active huge pages: %lld\n", pfn_active_huge); + + return TRUE; +} + +int +exclude_huge_page(void) +{ + /* + * Check having necessary information. + */ + if (SYMBOL(hstates) == NOT_FOUND_SYMBOL) + ERRMSG("Can't get necessary symbols for huge pages.\n"); + + if ((SIZE(hstate) == NOT_FOUND_STRUCTURE) + || (OFFSET(hstate.order) == NOT_FOUND_STRUCTURE) + || (OFFSET(hstate.nr_huge_pages) == NOT_FOUND_STRUCTURE) + || (OFFSET(hstate.free_huge_pages) == NOT_FOUND_STRUCTURE) + || (OFFSET(hstate.hugepage_activelist) == NOT_FOUND_STRUCTURE) + || (OFFSET(hstate.hugepage_freelists) == NOT_FOUND_STRUCTURE) + || (ARRAY_LENGTH(hstate.hugepage_freelists) + == NOT_FOUND_STRUCTURE)) { + ERRMSG("Can't get necessary structures for huge pages.\n"); + return FALSE; + } + + /* + * Detect huge pages and update 2nd-bitmap. + */ + if (!_exclude_huge_page()) + return FALSE; + + return TRUE; +} + /* * Let C be a cyclic buffer size and B a bitmap size used for * representing maximum block size managed by buddy allocator. @@ -4532,6 +4783,13 @@ exclude_unnecessary_pages_cyclic(void) return FALSE; /* + * Exclude huge pages. + */ + if (info->dump_level & (DL_EXCLUDE_FREE_HUGE | DL_EXCLUDE_ACTIVE_HUGE)) + if (!exclude_huge_page()) + return FALSE; + + /* * Exclude cache pages, cache private pages, user data pages, * free pages and hwpoison pages. */ @@ -4661,6 +4919,13 @@ create_2nd_bitmap(void) return FALSE; /* + * Exclude huge pages. + */ + if (info->dump_level & (DL_EXCLUDE_FREE_HUGE | DL_EXCLUDE_ACTIVE_HUGE)) + if (!exclude_huge_page()) + return FALSE; + + /* * Exclude Xen user domain. */ if (info->flag_exclude_xen_dom) { @@ -6513,6 +6778,7 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d */ pfn_zero = pfn_cache = pfn_cache_private = 0; pfn_user = pfn_free = pfn_hwpoison = 0; + pfn_free_huge = pfn_active_huge = 0; pfn_memhole = info->max_mapnr; cd_header->offset @@ -7416,7 +7682,8 @@ print_report(void) pfn_original = info->max_mapnr - pfn_memhole; pfn_excluded = pfn_zero + pfn_cache + pfn_cache_private - + pfn_user + pfn_free + pfn_hwpoison; + + pfn_user + pfn_free + pfn_hwpoison + + pfn_free_huge + pfn_active_huge; shrinking = (pfn_original - pfn_excluded) * 100; shrinking = shrinking / pfn_original; @@ -7429,6 +7696,9 @@ print_report(void) pfn_cache_private); REPORT_MSG(" User process data pages : 0x%016llx\n", pfn_user); REPORT_MSG(" Free pages : 0x%016llx\n", pfn_free); + REPORT_MSG(" Free hugepage pages : 0x%016llx\n", pfn_free_huge); + REPORT_MSG(" Active hugepage pages : 0x%016llx\n", + pfn_active_huge); REPORT_MSG(" Hwpoison pages : 0x%016llx\n", pfn_hwpoison); REPORT_MSG(" Remaining pages : 0x%016llx\n", pfn_original - pfn_excluded); diff --git a/makedumpfile.h b/makedumpfile.h index a5826e0..1a0a5fa 100644 --- a/makedumpfile.h +++ b/makedumpfile.h @@ -178,7 +178,7 @@ isAnon(unsigned long mapping) * Dump Level */ #define MIN_DUMP_LEVEL (0) -#define MAX_DUMP_LEVEL (31) +#define MAX_DUMP_LEVEL (127) #define NUM_ARRAY_DUMP_LEVEL (MAX_DUMP_LEVEL + 1) /* enough to allocate all the dump_level */ #define DL_EXCLUDE_ZERO (0x001) /* Exclude Pages filled with Zeros */ @@ -189,6 +189,9 @@ isAnon(unsigned long mapping) #define DL_EXCLUDE_USER_DATA (0x008) /* Exclude UserProcessData Pages */ #define DL_EXCLUDE_FREE (0x010) /* Exclude Free Pages */ +#define DL_EXCLUDE_FREE_HUGE (0x020) /* Exclude Free Huge Pages */ +#define DL_EXCLUDE_ACTIVE_HUGE (0x040) /* Exclude Active Huge Pages */ + /* * For parse_line() @@ -1098,6 +1101,7 @@ struct symbol_table { unsigned long long mem_map; unsigned long long vmem_map; unsigned long long mem_section; + unsigned long long hstates; unsigned long long pkmap_count; unsigned long long pkmap_count_next; unsigned long long system_utsname; @@ -1174,6 +1178,7 @@ struct size_table { long zone; long free_area; long list_head; + long hstate; long node_memblk_s; long nodemask_t; @@ -1232,6 +1237,13 @@ struct offset_table { struct free_area { long free_list; } free_area; + struct hstate { + long order; + long nr_huge_pages; + long free_huge_pages; + long hugepage_activelist; + long hugepage_freelists; + } hstate; struct list_head { long next; long prev; @@ -1368,6 +1380,9 @@ struct array_table { struct free_area_at { long free_list; } free_area; + struct hstate_at { + long hugepage_freelists; + } hstate; struct kimage_at { long segment; } kimage; @@ -1388,6 +1403,8 @@ struct number_table { long PG_hwpoison; long PAGE_BUDDY_MAPCOUNT_VALUE; + + long HUGE_MAX_HSTATE; }; struct srcfile_table {