>On 09/11/14 at 08:52am, Atsushi Kumagai wrote: >> >Hi Atsushi, >> > >> >Since huge pages are included in user pages, I can't think of a way to >> >make test cases for huge page exclusion. Could you give some suggestions >> >on this or how did you test it? >> >> Before I posted this patch, I tested as below. >> This idea came from the fact that old makedumpfile can't exclude >> huge pages except the first page(PG_head). >> >> 1. Get the number of hugepages from /proc/meminfo >> 2. Calculate the number of PG_tail pages >> 3. Capture the dumpfile without filtering >> 4. Run makedumpfile and compare the report message between v1.5.6 >> and v1.5.7(rc) to get how many user pages become excludable with >> this patch. >> 5. The result of Step2's and Step4's must be same, confirm it. >> >> The way above is for THP but you can apply it also for hugetlbfs >> if you take care of the things that old makedumpfile can't exclude >> *any* hugetlbfs pages. > >But THP is also Anonymous pages, doesn't it do the same for THP between >1.5.6 and 1.5.7? Only a PG_head page is marked as an anonymous page, makedumpfile doesn't distinguish PG_tail pages as anonymous pages. Please see below. do_huge_pmd_anonymous_page() + __do_huge_pmd_anonymous_page() + page_add_new_anon_rmap() + __page_set_anon_rmap() Thanks Atsushi Kumagai >> >> I recommend to separate the two cases completely by enabling either >> THP or hugetlbfs explicitly since it's easier to confirm the results. > >For hugeTlbfs, this works, I will try this. > >> >> >> Thanks >> Atsushi Kumagai >> >> > >> >Thanks >> >Baoquan >> > >> > >> >On 08/20/14 at 07:27am, Atsushi Kumagai wrote: >> >> There are 2 types of hugepages in the kernel, the both should be >> >> excluded as user pages. >> >> >> >> 1. Transparent huge pages (THP) >> >> All the pages are anonymous pages (at least for now), so we should >> >> just get how many pages are in the corresponding hugepage. >> >> It can be gotten from the page->lru.prev of the second page in the >> >> hugepage. >> >> >> >> 2. Hugetlbfs pages >> >> The pages aren't anonymous pages but kind of user pages, we should >> >> exclude also these pages in any way. >> >> Luckily, it's possible to detect these pages by looking the >> >> page->lru.next of the second page in the hugepage. This idea came >> >> from the kernel's PageHuge(). >> >> The number of pages can be gotten in the same way as THP. >> >> >> >> Changelog: >> >> v4: >> >> - Cleaned up according to Petr's and Baoquan's comments. >> >> v3: >> >> - Cleaned up according to Petr's comments. >> >> - Fix misdetection of hugetlb pages. >> >> v2: >> >> - Rebased to "Generic multi-page exclusion". >> >> >> >> Signed-off-by: Atsushi Kumagai <kumagai-atsushi at mxc.nes.nec.co.jp> >> >> --- >> >> makedumpfile.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++---------- >> >> makedumpfile.h | 7 +++++ >> >> 2 files changed, 78 insertions(+), 15 deletions(-) >> >> >> >> diff --git a/makedumpfile.c b/makedumpfile.c >> >> index 11cd473..b4b6eca 100644 >> >> --- a/makedumpfile.c >> >> +++ b/makedumpfile.c >> >> @@ -1180,6 +1180,7 @@ get_symbol_info(void) >> >> SYMBOL_INIT(vmemmap_list, "vmemmap_list"); >> >> SYMBOL_INIT(mmu_psize_defs, "mmu_psize_defs"); >> >> SYMBOL_INIT(mmu_vmemmap_psize, "mmu_vmemmap_psize"); >> >> + SYMBOL_INIT(free_huge_page, "free_huge_page"); >> >> >> >> SYMBOL_INIT(cpu_pgd, "cpu_pgd"); >> >> SYMBOL_INIT(demote_segment_4k, "demote_segment_4k"); >> >> @@ -1296,6 +1297,15 @@ get_structure_info(void) >> >> ENUM_NUMBER_INIT(PG_slab, "PG_slab"); >> >> ENUM_NUMBER_INIT(PG_hwpoison, "PG_hwpoison"); >> >> >> >> + ENUM_NUMBER_INIT(PG_head_mask, "PG_head_mask"); >> >> + if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER) { >> >> + ENUM_NUMBER_INIT(PG_head, "PG_head"); >> >> + if (NUMBER(PG_head) == NOT_FOUND_NUMBER) >> >> + ENUM_NUMBER_INIT(PG_head, "PG_compound"); >> >> + if (NUMBER(PG_head) != NOT_FOUND_NUMBER) >> >> + NUMBER(PG_head_mask) = 1UL << NUMBER(PG_head); >> >> + } >> >> + >> >> ENUM_TYPE_SIZE_INIT(pageflags, "pageflags"); >> >> >> >> TYPEDEF_SIZE_INIT(nodemask_t, "nodemask_t"); >> >> @@ -1530,6 +1540,9 @@ get_value_for_old_linux(void) >> >> NUMBER(PG_swapcache) = PG_swapcache_ORIGINAL; >> >> if (NUMBER(PG_slab) == NOT_FOUND_NUMBER) >> >> NUMBER(PG_slab) = PG_slab_ORIGINAL; >> >> + if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER) >> >> + NUMBER(PG_head_mask) = 1L << PG_compound_ORIGINAL; >> >> + >> >> /* >> >> * The values from here are for free page filtering based on >> >> * mem_map array. These are minimum effort to cover old >> >> @@ -1699,6 +1712,7 @@ write_vmcoreinfo_data(void) >> >> WRITE_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); >> >> WRITE_SYMBOL("cpu_pgd", cpu_pgd); >> >> WRITE_SYMBOL("demote_segment_4k", demote_segment_4k); >> >> + WRITE_SYMBOL("free_huge_page", free_huge_page); >> >> >> >> /* >> >> * write the structure size of 1st kernel >> >> @@ -1788,6 +1802,7 @@ write_vmcoreinfo_data(void) >> >> >> >> WRITE_NUMBER("PG_lru", PG_lru); >> >> WRITE_NUMBER("PG_private", PG_private); >> >> + WRITE_NUMBER("PG_head_mask", PG_head_mask); >> >> WRITE_NUMBER("PG_swapcache", PG_swapcache); >> >> WRITE_NUMBER("PG_buddy", PG_buddy); >> >> WRITE_NUMBER("PG_slab", PG_slab); >> >> @@ -2040,6 +2055,7 @@ read_vmcoreinfo(void) >> >> READ_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); >> >> READ_SYMBOL("cpu_pgd", cpu_pgd); >> >> READ_SYMBOL("demote_segment_4k", demote_segment_4k); >> >> + READ_SYMBOL("free_huge_page", free_huge_page); >> >> >> >> READ_STRUCTURE_SIZE("page", page); >> >> READ_STRUCTURE_SIZE("mem_section", mem_section); >> >> @@ -2116,6 +2132,7 @@ read_vmcoreinfo(void) >> >> >> >> READ_NUMBER("PG_lru", PG_lru); >> >> READ_NUMBER("PG_private", PG_private); >> >> + READ_NUMBER("PG_head_mask", PG_head_mask); >> >> READ_NUMBER("PG_swapcache", PG_swapcache); >> >> READ_NUMBER("PG_slab", PG_slab); >> >> READ_NUMBER("PG_buddy", PG_buddy); >> >> @@ -4643,13 +4660,16 @@ __exclude_unnecessary_pages(unsigned long mem_map, >> >> mdf_pfn_t pfn_start, mdf_pfn_t pfn_end, struct cycle *cycle) >> >> { >> >> mdf_pfn_t pfn; >> >> + mdf_pfn_t *pfn_counter; >> >> + mdf_pfn_t nr_pages; >> >> unsigned long index_pg, pfn_mm; >> >> unsigned long long maddr; >> >> mdf_pfn_t pfn_read_start, pfn_read_end; >> >> unsigned char page_cache[SIZE(page) * PGMM_CACHED]; >> >> unsigned char *pcache; >> >> - unsigned int _count, _mapcount = 0; >> >> + unsigned int _count, _mapcount = 0, compound_order = 0; >> >> unsigned long flags, mapping, private = 0; >> >> + unsigned long compound_dtor; >> >> >> >> /* >> >> * If a multi-page exclusion is pending, do it first >> >> @@ -4715,11 +4735,36 @@ __exclude_unnecessary_pages(unsigned long mem_map, >> >> flags = ULONG(pcache + OFFSET(page.flags)); >> >> _count = UINT(pcache + OFFSET(page._count)); >> >> mapping = ULONG(pcache + OFFSET(page.mapping)); >> >> + >> >> + if ((index_pg < PGMM_CACHED - 1) && >> >> + isCompoundHead(flags)) { >> >> + compound_order = ULONG(pcache + SIZE(page) + OFFSET(page.lru) >> >> + + OFFSET(list_head.prev)); >> >> + compound_dtor = ULONG(pcache + SIZE(page) + OFFSET(page.lru) >> >> + + OFFSET(list_head.next)); >> >> + >> >> + if ((compound_order >= sizeof(unsigned long) * 8) >> >> + || ((pfn & ((1UL << compound_order) - 1)) != 0)) { >> >> + /* Invalid order */ >> >> + compound_order = 0; >> >> + } >> >> + } else { >> >> + /* >> >> + * The last pfn of the mem_map cache must not be compound page >> >> + * since all compound pages are aligned to its page order and >> >> + * PGMM_CACHED is a power of 2. >> >> + */ >> >> + compound_order = 0; >> >> + compound_dtor = 0; >> >> + } >> >> + >> >> if (OFFSET(page._mapcount) != NOT_FOUND_STRUCTURE) >> >> _mapcount = UINT(pcache + OFFSET(page._mapcount)); >> >> if (OFFSET(page.private) != NOT_FOUND_STRUCTURE) >> >> private = ULONG(pcache + OFFSET(page.private)); >> >> >> >> + nr_pages = 1 << compound_order; >> >> + pfn_counter = NULL; >> >> /* >> >> * Exclude the free page managed by a buddy >> >> * Use buddy identification of free pages whether cyclic or not. >> >> @@ -4727,12 +4772,8 @@ __exclude_unnecessary_pages(unsigned long mem_map, >> >> if ((info->dump_level & DL_EXCLUDE_FREE) >> >> && info->page_is_buddy >> >> && info->page_is_buddy(flags, _mapcount, private, _count)) { >> >> - int nr_pages = 1 << private; >> >> - >> >> - exclude_range(&pfn_free, pfn, pfn + nr_pages, cycle); >> >> - >> >> - pfn += nr_pages - 1; >> >> - mem_map += (nr_pages - 1) * SIZE(page); >> >> + nr_pages = 1 << private; >> >> + pfn_counter = &pfn_free; >> >> } >> >> /* >> >> * Exclude the cache page without the private page. >> >> @@ -4740,8 +4781,7 @@ __exclude_unnecessary_pages(unsigned long mem_map, >> >> else if ((info->dump_level & DL_EXCLUDE_CACHE) >> >> && (isLRU(flags) || isSwapCache(flags)) >> >> && !isPrivate(flags) && !isAnon(mapping)) { >> >> - if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) >> >> - pfn_cache++; >> >> + pfn_counter = &pfn_cache; >> >> } >> >> /* >> >> * Exclude the cache page with the private page. >> >> @@ -4749,23 +4789,39 @@ __exclude_unnecessary_pages(unsigned long mem_map, >> >> else if ((info->dump_level & DL_EXCLUDE_CACHE_PRI) >> >> && (isLRU(flags) || isSwapCache(flags)) >> >> && !isAnon(mapping)) { >> >> - if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) >> >> - pfn_cache_private++; >> >> + pfn_counter = &pfn_cache_private; >> >> } >> >> /* >> >> * Exclude the data page of the user process. >> >> + * - anonymous pages >> >> + * - hugetlbfs pages >> >> */ >> >> else if ((info->dump_level & DL_EXCLUDE_USER_DATA) >> >> - && isAnon(mapping)) { >> >> - if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) >> >> - pfn_user++; >> >> + && (isAnon(mapping) || isHugetlb(compound_dtor))) { >> >> + pfn_counter = &pfn_user; >> >> } >> >> /* >> >> * Exclude the hwpoison page. >> >> */ >> >> else if (isHWPOISON(flags)) { >> >> + pfn_counter = &pfn_hwpoison; >> >> + } >> >> + /* >> >> + * Unexcludable page >> >> + */ >> >> + else >> >> + continue; >> >> + >> >> + /* >> >> + * Execute exclusion >> >> + */ >> >> + if (nr_pages == 1) { >> >> if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) >> >> - pfn_hwpoison++; >> >> + (*pfn_counter)++; >> >> + } else { >> >> + exclude_range(pfn_counter, pfn, pfn + nr_pages, cycle); >> >> + pfn += nr_pages - 1; >> >> + mem_map += (nr_pages - 1) * SIZE(page); >> >> } >> >> } >> >> return TRUE; >> >> diff --git a/makedumpfile.h b/makedumpfile.h >> >> index eba9798..9f90b53 100644 >> >> --- a/makedumpfile.h >> >> +++ b/makedumpfile.h >> >> @@ -74,6 +74,7 @@ int get_mem_type(void); >> >> #define PG_lru_ORIGINAL (5) >> >> #define PG_slab_ORIGINAL (7) >> >> #define PG_private_ORIGINAL (11) /* Has something at ->private */ >> >> +#define PG_compound_ORIGINAL (14) /* Is part of a compound page */ >> >> #define PG_swapcache_ORIGINAL (15) /* Swap page: swp_entry_t in private */ >> >> >> >> #define PAGE_BUDDY_MAPCOUNT_VALUE_v2_6_38 (-2) >> >> @@ -148,6 +149,9 @@ test_bit(int nr, unsigned long addr) >> >> >> >> #define isLRU(flags) test_bit(NUMBER(PG_lru), flags) >> >> #define isPrivate(flags) test_bit(NUMBER(PG_private), flags) >> >> +#define isCompoundHead(flags) (!!((flags) & NUMBER(PG_head_mask))) >> >> +#define isHugetlb(dtor) ((SYMBOL(free_huge_page) != NOT_FOUND_SYMBOL) \ >> >> + && (SYMBOL(free_huge_page) == dtor)) >> >> #define isSwapCache(flags) test_bit(NUMBER(PG_swapcache), flags) >> >> #define isHWPOISON(flags) (test_bit(NUMBER(PG_hwpoison), flags) \ >> >> && (NUMBER(PG_hwpoison) != NOT_FOUND_NUMBER)) >> >> @@ -1218,6 +1222,7 @@ struct symbol_table { >> >> unsigned long long node_remap_start_vaddr; >> >> unsigned long long node_remap_end_vaddr; >> >> unsigned long long node_remap_start_pfn; >> >> + unsigned long long free_huge_page; >> >> >> >> /* >> >> * for Xen extraction >> >> @@ -1509,6 +1514,8 @@ struct number_table { >> >> */ >> >> long PG_lru; >> >> long PG_private; >> >> + long PG_head; >> >> + long PG_head_mask; >> >> long PG_swapcache; >> >> long PG_buddy; >> >> long PG_slab; >> >> -- >> >> 1.9.0 >> >> >> >> _______________________________________________ >> >> kexec mailing list >> >> kexec at lists.infradead.org >> >> http://lists.infradead.org/mailman/listinfo/kexec