From: Cliff Wickman <cpw@xxxxxxx> I've been experimenting with asking the kernel to scan the page tables instead of reading all those page structures through /proc/vmcore. The results are rather dramatic. On a small, idle UV: about 4 sec. versus about 40 sec. On a 8TB UV the unnecessary page scan takes 4 minutes, vs. about 200 min through /proc/vmcore. This patch incorporates this scheme into version 1.5.1, so that the cyclic processing can use the kernel scans. It also uses the page_is_buddy logic to speed the finding of free pages. And also allows makedumpfile to work as before with a kernel that does not provide /proc/vmcore_pfn_lists. This patch: - writes requests to new kernel file /proc/vmcore_pfn_lists - makes request PL_REQUEST_MEMMAP to pass the crash kernel information about the boot kernel - makes requests PL_REQUEST_FREE and PL_REQUEST_EXCLUDE, asking the kernel to return lists of PFNs - adds page scan timing options -n -o and -t The patch [PATCH] makedumpfile: fix to exclude_unnecessary_pages_cyclic is re-done by the below, so that patch should not be applied. This patch depends on a kernel patch, so I'm also sending one that applies to a 3.0.13 kernel: [PATCH] scan page tables for makedumpfile, 3.0.13 kernel Diffed against makedumpfile-1.5.1 Signed-off-by: Cliff Wickman <cpw at sgi.com> --- dwarf_info.c | 2 makedumpfile.c | 429 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- makedumpfile.h | 91 +++++++++++- print_info.c | 5 print_info.h | 3 5 files changed, 507 insertions(+), 23 deletions(-) Index: makedumpfile-1.5.1/makedumpfile.h =================================================================== --- makedumpfile-1.5.1.orig/makedumpfile.h +++ makedumpfile-1.5.1/makedumpfile.h @@ -421,7 +421,8 @@ do { \ #define KVER_MIN_SHIFT 16 #define KERNEL_VERSION(x,y,z) (((x) << KVER_MAJ_SHIFT) | ((y) << KVER_MIN_SHIFT) | (z)) #define OLDEST_VERSION KERNEL_VERSION(2, 6, 15)/* linux-2.6.15 */ -#define LATEST_VERSION KERNEL_VERSION(3, 4, 8)/* linux-3.4.8 */ +//define LATEST_VERSION KERNEL_VERSION(3, 4, 8)/* linux-3.4.8 */ +#define LATEST_VERSION KERNEL_VERSION(3, 7, 8)/* linux-3.4.8 */ /* * vmcoreinfo in /proc/vmcore @@ -797,9 +798,20 @@ typedef struct { } xen_crash_info_v2_t; struct mem_map_data { + /* + * pfn_start/pfn_end are the pfn's represented by this mem_map entry. + * mem_map is the virtual address of the array of page structures + * that represent this pages. + * paddr is the physical address of that array of structures. + * ending_paddr would be (pfn_end - pfn_start) * sizeof(struct page). + * section_vaddr is the address we get from ioremap_cache(). + */ unsigned long long pfn_start; unsigned long long pfn_end; - unsigned long mem_map; + unsigned long mem_map; + unsigned long long paddr; /* filled in by makedumpfile */ + unsigned long long ending_paddr; /* filled in by kernel */ + void *section_vaddr; /* filled in by kernel */ }; struct dump_bitmap { @@ -878,6 +890,7 @@ struct DumpInfo { int flag_rearrange; /* flag of creating dumpfile from flattened format */ int flag_split; /* splitting vmcore */ + int flag_use_kernel_lists; int flag_cyclic; /* cyclic processing to keep memory consumption */ int flag_reassemble; /* reassemble multiple dumpfiles into one */ int flag_refiltering; /* refilter from kdump-compressed file */ @@ -1393,6 +1406,80 @@ struct domain_list { unsigned int pickled_id; }; +#define PL_REQUEST_FREE 1 /* request for a list of free pages */ +#define PL_REQUEST_EXCLUDE 2 /* request for a list of excludable + pages */ +#define PL_REQUEST_MEMMAP 3 /* request to pass in the makedumpfile + mem_map_data table */ +/* + * limit the size of the pfn list to this many pfn_element structures + */ +#define MAX_PFN_LIST 10000 + +/* + * one element in the pfn_list + */ +struct pfn_element { + unsigned long pfn; + unsigned long order; +}; + +/* + * a request for finding pfn's that can be excluded from the dump + * they may be pages of particular types or free pages + */ +struct pfn_list_request { + int request; /* PL_REQUEST_FREE PL_REQUEST_EXCLUDE or */ + /* PL_REQUEST_MEMMAP */ + int debug; + unsigned long paddr; /* mem_map address for PL_REQUEST_EXCLUDE */ + unsigned long pfn_start;/* pfn represented by paddr */ + unsigned long pgdat_paddr; /* for PL_REQUEST_FREE */ + unsigned long pgdat_vaddr; /* for PL_REQUEST_FREE */ + int node; /* for PL_REQUEST_FREE */ + int exclude_bits; /* for PL_REQUEST_EXCLUDE */ + int count; /* for PL_REQUEST_EXCLUDE */ + void *reply_ptr; /* address of user's pfn_reply, for reply */ + void *pfn_list_ptr; /* address of user's pfn array (*pfn_list) */ + int map_count; /* for PL_REQUEST_MEMMAP; elements */ + int map_size; /* for PL_REQUEST_MEMMAP; bytes in table */ + void *map_ptr; /* for PL_REQUEST_MEMMAP; address of table */ + long list_size; /* for PL_REQUEST_MEMMAP negotiation */ + /* resume info: */ + int more; /* 0 for done, 1 for "there's more" */ + /* PL_REQUEST_EXCLUDE: */ + int map_index; /* slot in the mem_map array of page structs */ + /* PL_REQUEST_FREE: */ + int zone_index; /* zone within the node's pgdat_list */ + int freearea_index; /* free_area within the zone */ + int type_index; /* free_list within the free_area */ + int list_ct; /* page within the list */ +}; + +/* + * the reply from a pfn_list_request + * the list of pfn's itself is pointed to by pfn_list + */ +struct pfn_reply { + long pfn_list_elements; /* negoiated on PL_REQUEST_MEMMAP */ + long in_pfn_list; /* returned by PL_REQUEST_EXCLUDE and + PL_REQUEST_FREE */ + /* resume info */ + int more; /* 0 == done, 1 == there is more */ + /* PL_REQUEST_MEMMAP: */ + int map_index; /* slot in the mem_map array of page structs */ + /* PL_REQUEST_FREE: */ + int zone_index; /* zone within the node's pgdat_list */ + int freearea_index; /* free_area within the zone */ + int type_index; /* free_list within the free_area */ + int list_ct; /* page within the list */ + /* statistic counters: */ + unsigned long long pfn_cache; /* PL_REQUEST_EXCLUDE */ + unsigned long long pfn_cache_private; /* PL_REQUEST_EXCLUDE */ + unsigned long long pfn_user; /* PL_REQUEST_EXCLUDE */ + unsigned long long pfn_free; /* PL_REQUEST_FREE */ +}; + #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) #define MFNS_PER_FRAME (info->page_size / sizeof(unsigned long)) Index: makedumpfile-1.5.1/dwarf_info.c =================================================================== --- makedumpfile-1.5.1.orig/dwarf_info.c +++ makedumpfile-1.5.1/dwarf_info.c @@ -350,6 +350,8 @@ get_data_member_location(Dwarf_Die *die, return TRUE; } +int dwarf_formref(Dwarf_Attribute *, Dwarf_Off *); + static int get_die_type(Dwarf_Die *die, Dwarf_Die *die_type) { Index: makedumpfile-1.5.1/print_info.c =================================================================== --- makedumpfile-1.5.1.orig/print_info.c +++ makedumpfile-1.5.1/print_info.c @@ -244,6 +244,11 @@ print_usage(void) MSG(" [-f]:\n"); MSG(" Overwrite DUMPFILE even if it already exists.\n"); MSG("\n"); + MSG(" [-o]:\n"); + MSG(" Read page structures from /proc/vmcore in the scan for\n"); + MSG(" free and excluded pages regardless of whether\n"); + MSG(" /proc/vmcore_pfn_lists is present.\n"); + MSG("\n"); MSG(" [-h]:\n"); MSG(" Show help message and LZO/snappy support status (enabled/disabled).\n"); MSG("\n"); Index: makedumpfile-1.5.1/print_info.h =================================================================== --- makedumpfile-1.5.1.orig/print_info.h +++ makedumpfile-1.5.1/print_info.h @@ -43,7 +43,8 @@ void print_execution_time(char *step_nam */ #define MIN_MSG_LEVEL (0) #define MAX_MSG_LEVEL (31) -#define DEFAULT_MSG_LEVEL (7) /* Print the progress indicator, the +// cpw: was 7 but add x10 for testing +#define DEFAULT_MSG_LEVEL (23) /* Print the progress indicator, the common message, the error message */ #define ML_PRINT_PROGRESS (0x001) /* Print the progress indicator */ #define ML_PRINT_COMMON_MSG (0x002) /* Print the common message */ Index: makedumpfile-1.5.1/makedumpfile.c =================================================================== --- makedumpfile-1.5.1.orig/makedumpfile.c +++ makedumpfile-1.5.1/makedumpfile.c @@ -13,6 +13,8 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ +#define _GNU_SOURCE +#include <stdio.h> #include "makedumpfile.h" #include "print_info.h" #include "dwarf_info.h" @@ -31,6 +33,13 @@ struct srcfile_table srcfile_table; struct vm_table vt = { 0 }; struct DumpInfo *info = NULL; +int pfn_list_fd; +struct pfn_element *pfn_list; +int nflag = 0; +int oflag = 0; +int tflag = 0; +struct timeval scan_start; +int max_pfn_list; char filename_stdout[] = FILENAME_STDOUT; @@ -420,6 +429,7 @@ get_kernel_version(char *release) /* * This method checks that vmlinux and vmcore are same kernel version. */ +release = "3.0.0"; start = release; maj = strtol(start, &end, 10); if (maj == LONG_MAX) @@ -2423,6 +2433,9 @@ get_mm_sparsemem(void) unsigned long long pfn_start, pfn_end; unsigned long section, mem_map; unsigned long *mem_sec = NULL; + int i; + int num_mem_map; + struct mem_map_data *mmd; int ret = FALSE; @@ -2467,6 +2480,21 @@ get_mm_sparsemem(void) dump_mem_map(pfn_start, pfn_end, mem_map, section_nr); } ret = TRUE; + + /* add paddr to the table */ + mmd = &info->mem_map_data[0]; + num_mem_map = info->num_mem_map; + for (i = 0; i < num_mem_map; i++) { + if (mmd[i].mem_map == 0) { + mmd[i].paddr = 0; + } else { + mmd[i].paddr = vaddr_to_paddr(mmd[i].mem_map); + if (mmd[i].paddr == 0) + printf("! can't translate %#lx to paddr\n", + mmd[i].mem_map); + } + } + out: if (mem_sec != NULL) free(mem_sec); @@ -2841,7 +2869,14 @@ out: if (!get_value_for_old_linux()) return FALSE; - if (info->flag_cyclic && (info->dump_level & DL_EXCLUDE_FREE)) + /* + * page_is_buddy will tell us whether to find free pages + * in a separate pass, whether cyclic or not. + * With non-cyclic -o we always do a separate free pages pass, so + * do not set up page_is_buddy in that case. + */ + if ((info->flag_cyclic || !oflag) && + (info->dump_level & DL_EXCLUDE_FREE)) setup_page_is_buddy(); return TRUE; @@ -3557,6 +3592,65 @@ out: return ret; } +/* + * let the kernel find excludable pages from one node + */ +void +__exclude_free_pages_kernel(unsigned long pgdat, int node) +{ + int i, j, ret, pages; + unsigned long pgdat_paddr; + struct pfn_list_request request; + struct pfn_reply reply; + struct pfn_element *pe; + + if ((pgdat_paddr = vaddr_to_paddr(pgdat)) == NOT_PADDR) { + ERRMSG("Can't convert virtual address(%#lx) to physical.\n", + pgdat); + return; + } + + /* + * Get the list of free pages. + * This may be broken up into MAX_PFN_list arrays of PFNs. + */ + memset(&request, 0, sizeof(request)); + request.request = PL_REQUEST_FREE; + request.node = node; + request.pgdat_paddr = pgdat_paddr; + request.pgdat_vaddr = pgdat; + request.reply_ptr = (void *)&reply; + request.pfn_list_ptr = (void *)pfn_list; + memset(&reply, 0, sizeof(reply)); + + do { + request.more = 0; + if (reply.more) { + /* this is to be a continuation of the last request */ + request.more = 1; + request.zone_index = reply.zone_index; + request.freearea_index = reply.freearea_index; + request.type_index = reply.type_index; + request.list_ct = reply.list_ct; + } + ret = write(pfn_list_fd, &request, sizeof(request)); + if (ret != sizeof(request)) { + printf("PL_REQUEST_FREE failed\n"); + return; + } + pfn_free += reply.pfn_free; + + for (i = 0; i < reply.in_pfn_list; i++) { + pe = &pfn_list[i]; + pages = (1 << pe->order); + for (j = 0; j < pages; j++) { + clear_bit_on_2nd_bitmap_for_kernel(pe->pfn + j); + } + } + } while (reply.more); + + return; +} int _exclude_free_page(void) @@ -3576,7 +3670,24 @@ _exclude_free_page(void) gettimeofday(&tv_start, NULL); for (num_nodes = 1; num_nodes <= vt.numnodes; num_nodes++) { - + if (!info->flag_cyclic && info->flag_use_kernel_lists) { + node_zones = pgdat + OFFSET(pglist_data.node_zones); + if (!readmem(VADDR, + pgdat + OFFSET(pglist_data.nr_zones), + &nr_zones, sizeof(nr_zones))) { + ERRMSG("Can't get nr_zones.\n"); + return FALSE; + } + print_progress(PROGRESS_FREE_PAGES, num_nodes - 1, + vt.numnodes); + /* ask the kernel to do one node */ + __exclude_free_pages_kernel(pgdat, node); + goto next_pgdat; + } + /* + * kernel does not have the pfn_list capability + * use the old way + */ print_progress(PROGRESS_FREE_PAGES, num_nodes - 1, vt.numnodes); node_zones = pgdat + OFFSET(pglist_data.node_zones); @@ -3603,6 +3714,7 @@ _exclude_free_page(void) if (!reset_bitmap_of_free_pages(zone)) return FALSE; } + next_pgdat: if (num_nodes < vt.numnodes) { if ((node = next_online_node(node + 1)) < 0) { ERRMSG("Can't get next online node.\n"); @@ -3620,6 +3732,8 @@ _exclude_free_page(void) */ print_progress(PROGRESS_FREE_PAGES, vt.numnodes, vt.numnodes); print_execution_time(PROGRESS_FREE_PAGES, &tv_start); + if (tflag) + print_execution_time("Total time", &scan_start); return TRUE; } @@ -3780,7 +3894,6 @@ setup_page_is_buddy(void) } } else info->page_is_buddy = page_is_buddy_v2; - out: if (!info->page_is_buddy) DEBUG_MSG("Can't select page_is_buddy handler; " @@ -3989,10 +4102,77 @@ exclude_zero_pages(void) return TRUE; } +/* + * let the kernel find excludable pages from one mem_section + */ +int +__exclude_unnecessary_pages_kernel(int mm, struct mem_map_data *mmd) +{ + unsigned long long pfn_start = mmd->pfn_start; + unsigned long long pfn_end = mmd->pfn_end; + int i, j, ret, pages; + struct pfn_list_request request; + struct pfn_reply reply; + struct pfn_element *pe; + + /* + * Get the list of to-be-excluded pages in this section. + * It may be broken up by groups of max_pfn_list size. + */ + memset(&request, 0, sizeof(request)); + request.request = PL_REQUEST_EXCLUDE; + request.paddr = mmd->paddr; /* phys addr of mem_map */ + request.reply_ptr = (void *)&reply; + request.pfn_list_ptr = (void *)pfn_list; + request.exclude_bits = 0; + request.pfn_start = pfn_start; + request.count = pfn_end - pfn_start; + if (info->dump_level & DL_EXCLUDE_CACHE) + request.exclude_bits |= DL_EXCLUDE_CACHE; + if (info->dump_level & DL_EXCLUDE_CACHE_PRI) + request.exclude_bits |= DL_EXCLUDE_CACHE_PRI; + if (info->dump_level & DL_EXCLUDE_USER_DATA) + request.exclude_bits |= DL_EXCLUDE_USER_DATA; + if (info->dump_level & DL_EXCLUDE_FREE) + request.exclude_bits |= DL_EXCLUDE_FREE; + memset(&reply, 0, sizeof(reply)); + + do { + /* pfn represented by paddr */ + request.more = 0; + if (reply.more) { + /* this is to be a continuation of the last request */ + request.more = 1; + request.map_index = reply.map_index; + } + + ret = write(pfn_list_fd, &request, sizeof(request)); + if (ret != sizeof(request)) + return FALSE; + + pfn_cache += reply.pfn_cache; + pfn_cache_private += reply.pfn_cache_private; + pfn_user += reply.pfn_user; + pfn_free += reply.pfn_free; + + for (i = 0; i < reply.in_pfn_list; i++) { + pe = &pfn_list[i]; + pages = (1 << pe->order); + for (j = 0; j < pages; j++) { + clear_bit_on_2nd_bitmap_for_kernel(pe->pfn + j); + } + } + } while (reply.more); + + return TRUE; +} + int -__exclude_unnecessary_pages(unsigned long mem_map, - unsigned long long pfn_start, unsigned long long pfn_end) +__exclude_unnecessary_pages(int mm, struct mem_map_data *mmd) { + unsigned long long pfn_start = mmd->pfn_start; + unsigned long long pfn_end = mmd->pfn_end; + unsigned long mem_map = mmd->mem_map; unsigned long long pfn, pfn_mm, maddr; unsigned long long pfn_read_start, pfn_read_end, index_pg; unsigned char page_cache[SIZE(page) * PGMM_CACHED]; @@ -4000,6 +4180,12 @@ __exclude_unnecessary_pages(unsigned lon unsigned int _count, _mapcount = 0; unsigned long flags, mapping, private = 0; + if (info->flag_use_kernel_lists) { + if (__exclude_unnecessary_pages_kernel(mm, mmd) == FALSE) + return FALSE; + return TRUE; + } + /* * Refresh the buffer of struct page, when changing mem_map. */ @@ -4110,19 +4296,175 @@ __exclude_unnecessary_pages(unsigned lon return TRUE; } +/* + * construct a version of the mem_map_data table to pass to the kernel + */ +void * +make_kernel_mmap(int *kmap_elements, int *kmap_size) +{ + int i, j; + int elements = 0; + int page_structs; + int elem; + unsigned long base_end_pfn; + unsigned long end_paddr; + struct mem_map_data *mmdo, *mmdn; + struct mem_map_data *mmdbase, *mmdnext, *mmdend, *mmdwork; + struct mem_map_data temp_mmd; + struct mem_map_data *mmap; + + mmap = malloc(info->num_mem_map * sizeof(struct mem_map_data)); + if (mmap == NULL) { + ERRMSG("Can't allocate memory kernel map\n"); + return NULL; + } + for (i = 0, mmdn = mmap, mmdo = &info->mem_map_data[0]; + i < info->num_mem_map; i++, mmdo++) { + if (mmdo->mem_map && mmdo->paddr) { + *mmdn = *mmdo; + mmdn++; + elements++; + } + } + + /* make sure it is sorted by mem_map (it should be already) */ + mmdn = mmap; + for (i = 0; i < elements - 1; i++) { + for (j = i + 1; j < elements; j++) { + if (mmdn[j].mem_map < mmdn[i].mem_map) { + temp_mmd = mmdn[j]; + mmdn[j] = mmdn[i]; + mmdn[i] = temp_mmd; + } + } + } + + /* + * consolidate those mem_map's with occupying consecutive physical + * addresses + * pages represented by these pages structs: addr of page struct + * pfns 0x1000000-1008000 mem_map 0xffffea0038000000 paddr 0x11f7e00000 + * pfns 0x1008000-1010000 mem_map 0xffffea00381c0000 paddr 0x11f7fc0000 + * pfns 0x1010000-1018000 mem_map 0xffffea0038380000 paddr 0x11f8180000 + * 8000 increments inc's: 1c0000 + * 8000000 of memory (128M) 8000 page structs + * + */ + mmdbase = mmap; + mmdnext = mmap + 1; + mmdend = mmap + elements; + while (mmdnext < mmdend) { + elem = mmdend - mmdnext; + /* test mmdbase vs. mmdwork and onward: */ + for (i = 0, mmdwork = mmdnext; i < elem; i++, mmdwork++) { + base_end_pfn = mmdbase->pfn_end; + if (base_end_pfn == mmdwork->pfn_start) { + page_structs = (mmdbase->pfn_end - + mmdbase->pfn_start); + end_paddr = (page_structs * SIZE(page)) + + mmdbase->paddr; + if (mmdwork->paddr == end_paddr) { + /* extend base by the work one */ + mmdbase->pfn_end = mmdwork->pfn_end; + /* next is where to begin next time */ + mmdnext = mmdwork + 1; + } else { + /* gap in address of page + structs; end of section */ + mmdbase++; + if (mmdwork - mmdbase > 0) + *mmdbase = *mmdwork; + mmdnext = mmdwork + 1; + break; + } + } else { + /* gap in pfns; end of section */ + mmdbase++; + if (mmdwork - mmdbase > 0) + *mmdbase = *mmdwork; + mmdnext = mmdwork + 1; + break; + } + } + } + elements = (mmdbase - mmap) + 1; + *kmap_elements = elements; + *kmap_size = elements * sizeof(struct mem_map_data); + return mmap; +} + +/* + * Pass in the mem_map_data table. + * Must do this once, and before doing PL_REQUEST_FREE or PL_REQUEST_EXCLUDE. + */ +int +setup_kernel_mmap() +{ + int ret; + int kmap_elements, kmap_size; + long malloc_size; + void *kmap_addr; + struct pfn_list_request request; + struct pfn_reply reply; + + kmap_addr = make_kernel_mmap(&kmap_elements, &kmap_size); + if (kmap_addr == NULL) + return FALSE; + memset(&request, 0, sizeof(request)); + request.request = PL_REQUEST_MEMMAP; + request.map_ptr = kmap_addr; + request.reply_ptr = (void *)&reply; + request.map_count = kmap_elements; + request.map_size = kmap_size; + request.list_size = MAX_PFN_LIST; + + ret = write(pfn_list_fd, &request, sizeof(request)); + if (ret < 0) { + fprintf(stderr, "PL_REQUEST_MEMMAP returned %d\n", ret); + return FALSE; + } + /* the reply tells us how long the kernel's list actually is */ + max_pfn_list = reply.pfn_list_elements; + if (max_pfn_list <= 0) { + fprintf(stderr, + "PL_REQUEST_MEMMAP returned max_pfn_list %d\n", + max_pfn_list); + return FALSE; + } + if (max_pfn_list < MAX_PFN_LIST) { + printf("length of pfn list dropped from %d to %d\n", + MAX_PFN_LIST, max_pfn_list); + } + free(kmap_addr); + /* + * Allocate the buffer for the PFN list (just once). + */ + malloc_size = max_pfn_list * sizeof(struct pfn_element); + if ((pfn_list = (struct pfn_element *)malloc(malloc_size)) == NULL) { + ERRMSG("Can't allocate pfn_list of %ld\n", malloc_size); + return FALSE; + } + return TRUE; +} + int exclude_unnecessary_pages(void) { - unsigned int mm; - struct mem_map_data *mmd; - struct timeval tv_start; + unsigned int mm; + struct mem_map_data *mmd; + struct timeval tv_start; if (is_xen_memory() && !info->dom0_mapnr) { ERRMSG("Can't get max domain-0 PFN for excluding pages.\n"); return FALSE; } + if (!info->flag_cyclic && info->flag_use_kernel_lists) { + if (setup_kernel_mmap() == FALSE) + return FALSE; + } gettimeofday(&tv_start, NULL); + gettimeofday(&scan_start, NULL); for (mm = 0; mm < info->num_mem_map; mm++) { print_progress(PROGRESS_UNN_PAGES, mm, info->num_mem_map); @@ -4131,9 +4473,9 @@ exclude_unnecessary_pages(void) if (mmd->mem_map == NOT_MEMMAP_ADDR) continue; - - if (!__exclude_unnecessary_pages(mmd->mem_map, - mmd->pfn_start, mmd->pfn_end)) + if (mmd->paddr == 0) + continue; + if (!__exclude_unnecessary_pages(mm, mmd)) return FALSE; } @@ -4187,9 +4529,10 @@ exclude_unnecessary_pages_cyclic(void) if (mmd->mem_map == NOT_MEMMAP_ADDR) continue; - if (mmd->pfn_end >= info->cyclic_start_pfn || mmd->pfn_start <= info->cyclic_end_pfn) { - if (!__exclude_unnecessary_pages(mmd->mem_map, - mmd->pfn_start, mmd->pfn_end)) + if (mmd->pfn_end >= info->cyclic_start_pfn && + mmd->pfn_start <= info->cyclic_end_pfn) { + if (__exclude_unnecessary_pages(mm, mmd) + == FALSE) return FALSE; } } @@ -4219,7 +4562,7 @@ update_cyclic_region(unsigned long long if (!create_1st_bitmap_cyclic()) return FALSE; - if (!exclude_unnecessary_pages_cyclic()) + if (exclude_unnecessary_pages_cyclic() == FALSE) return FALSE; return TRUE; @@ -4279,16 +4622,17 @@ create_2nd_bitmap(void) if (info->dump_level & DL_EXCLUDE_CACHE || info->dump_level & DL_EXCLUDE_CACHE_PRI || info->dump_level & DL_EXCLUDE_USER_DATA) { - if (!exclude_unnecessary_pages()) { + if (exclude_unnecessary_pages() == FALSE) { ERRMSG("Can't exclude unnecessary pages.\n"); return FALSE; } } /* - * Exclude free pages. + * Exclude free pages. (no separate pass is needed if pages can be + * identified as part of the buddy system) */ - if (info->dump_level & DL_EXCLUDE_FREE) + if ((info->dump_level & DL_EXCLUDE_FREE) && !info->page_is_buddy) if (!exclude_free_page()) return FALSE; @@ -4419,6 +4763,10 @@ create_dump_bitmap(void) int ret = FALSE; if (info->flag_cyclic) { + if (info->flag_use_kernel_lists) { + if (setup_kernel_mmap() == FALSE) + goto out; + } if (!prepare_bitmap_buffer_cyclic()) goto out; @@ -4896,6 +5244,7 @@ get_num_dumpable_cyclic(void) { unsigned long long pfn, num_dumpable=0; + gettimeofday(&scan_start, NULL); for (pfn = 0; pfn < info->max_mapnr; pfn++) { if (!update_cyclic_region(pfn)) return FALSE; @@ -5225,7 +5574,7 @@ get_loads_dumpfile_cyclic(void) info->cyclic_end_pfn = info->pfn_cyclic; if (!create_1st_bitmap_cyclic()) return FALSE; - if (!exclude_unnecessary_pages_cyclic()) + if (exclude_unnecessary_pages_cyclic() == FALSE) return FALSE; if (!(phnum = get_phnum_memory())) @@ -5792,6 +6141,7 @@ write_kdump_pages_cyclic(struct cache_da for (pfn = start_pfn; pfn < end_pfn; pfn++) { if ((num_dumped % per) == 0) + print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); /* @@ -6232,6 +6582,8 @@ write_kdump_pages_and_bitmap_cyclic(stru if (!update_cyclic_region(pfn)) return FALSE; + if (tflag) + print_execution_time("Total time", &scan_start); if (!write_kdump_pages_cyclic(cd_header, cd_page, &pd_zero, &offset_data)) return FALSE; @@ -7365,6 +7717,11 @@ retry: if ((status = writeout_multiple_dumpfiles()) == FALSE) return FALSE; } else { + if (nflag) { /* a bit too early for the cyclic case */ + printf("\n"); + print_report(); + return TRUE; + } if ((status = writeout_dumpfile()) == FALSE) return FALSE; } @@ -8257,6 +8614,22 @@ static struct option longopts[] = { {0, 0, 0, 0} }; +/* + * test for the presence of capability in the kernel to provide lists + * of pfn's: + * /proc/vmcore_pfn_lists + * return 1 for present + * return 0 for not present + */ +int +test_kernel_pfn_lists(void) +{ + if ((pfn_list_fd = open("/proc/vmcore_pfn_lists", O_WRONLY)) < 0) { + return 0; + } + return 1; +} + int main(int argc, char *argv[]) { @@ -8282,7 +8655,7 @@ main(int argc, char *argv[]) info->block_order = DEFAULT_ORDER; message_level = DEFAULT_MSG_LEVEL; - while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:lMpRrsvXx:", longopts, + while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:MnoRrstVvXx:Y", longopts, NULL)) != -1) { switch (opt) { case 'b': @@ -8340,6 +8713,13 @@ main(int argc, char *argv[]) case 'M': info->flag_dmesg = 1; break; + case 'n': + /* -n undocumented, for testing page scanning time */ + nflag = 1; + break; + case 'o': + oflag = 1; + break; case 'p': info->flag_compress = DUMP_DH_COMPRESSED_SNAPPY; break; @@ -8358,6 +8738,9 @@ main(int argc, char *argv[]) case 'r': info->flag_reassemble = 1; break; + case 't': + tflag = 1; + break; case 'V': info->vaddr_for_vtop = strtoul(optarg, NULL, 0); break; @@ -8389,6 +8772,12 @@ main(int argc, char *argv[]) goto out; } } + + if (oflag) + info->flag_use_kernel_lists = 0; + else + info->flag_use_kernel_lists = test_kernel_pfn_lists(); + if (flag_debug) message_level |= ML_PRINT_DEBUG_MSG;