Hello Kumagai-san, thank you for this patch. I was on vacation last week (and will be on vacation the next two weeks again). Anyway, see my comments below. On Wed, 23 Jul 2014 00:32:20 +0000 Atsushi Kumagai <kumagai-atsushi at mxc.nes.nec.co.jp> wrote: >[...] > > Signed-off-by: Atsushi Kumagai <kumagai-atsushi at mxc.nes.nec.co.jp> > --- > makedumpfile.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++-------- > makedumpfile.h | 7 ++++ > 2 files changed, 105 insertions(+), 15 deletions(-) > > diff --git a/makedumpfile.c b/makedumpfile.c > index 3884aa5..7bee413 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -1180,6 +1180,7 @@ get_symbol_info(void) > SYMBOL_INIT(vmemmap_list, "vmemmap_list"); > SYMBOL_INIT(mmu_psize_defs, "mmu_psize_defs"); > SYMBOL_INIT(mmu_vmemmap_psize, "mmu_vmemmap_psize"); > + SYMBOL_INIT(free_huge_page, "free_huge_page"); > > return TRUE; > } > @@ -1293,6 +1294,15 @@ get_structure_info(void) > ENUM_NUMBER_INIT(PG_slab, "PG_slab"); > ENUM_NUMBER_INIT(PG_hwpoison, "PG_hwpoison"); > > + ENUM_NUMBER_INIT(PG_head_mask, "PG_head_mask"); > + if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER) { > + ENUM_NUMBER_INIT(PG_head, "PG_head"); > + if (NUMBER(PG_head) == NOT_FOUND_NUMBER) > + ENUM_NUMBER_INIT(PG_head, "PG_compound"); > + if (NUMBER(PG_head) != NOT_FOUND_NUMBER) > + NUMBER(PG_head_mask) = 1UL << NUMBER(PG_head); > + } > + > ENUM_TYPE_SIZE_INIT(pageflags, "pageflags"); > > TYPEDEF_SIZE_INIT(nodemask_t, "nodemask_t"); > @@ -1527,6 +1537,9 @@ get_value_for_old_linux(void) > NUMBER(PG_swapcache) = PG_swapcache_ORIGINAL; > if (NUMBER(PG_slab) == NOT_FOUND_NUMBER) > NUMBER(PG_slab) = PG_slab_ORIGINAL; > + if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER) > + NUMBER(PG_head_mask) = 1L << PG_compound_ORIGINAL; > + > /* > * The values from here are for free page filtering based on > * mem_map array. These are minimum effort to cover old > @@ -1694,6 +1707,7 @@ write_vmcoreinfo_data(void) > WRITE_SYMBOL("vmemmap_list", vmemmap_list); > WRITE_SYMBOL("mmu_psize_defs", mmu_psize_defs); > WRITE_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); > + WRITE_SYMBOL("free_huge_page", free_huge_page); > > /* > * write the structure size of 1st kernel > @@ -1783,6 +1797,7 @@ write_vmcoreinfo_data(void) > > WRITE_NUMBER("PG_lru", PG_lru); > WRITE_NUMBER("PG_private", PG_private); > + WRITE_NUMBER("PG_head_mask", PG_head_mask); > WRITE_NUMBER("PG_swapcache", PG_swapcache); > WRITE_NUMBER("PG_buddy", PG_buddy); > WRITE_NUMBER("PG_slab", PG_slab); > @@ -2033,6 +2048,7 @@ read_vmcoreinfo(void) > READ_SYMBOL("vmemmap_list", vmemmap_list); > READ_SYMBOL("mmu_psize_defs", mmu_psize_defs); > READ_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); > + READ_SYMBOL("free_huge_page", free_huge_page); > > READ_STRUCTURE_SIZE("page", page); > READ_STRUCTURE_SIZE("mem_section", mem_section); > @@ -2109,6 +2125,7 @@ read_vmcoreinfo(void) > > READ_NUMBER("PG_lru", PG_lru); > READ_NUMBER("PG_private", PG_private); > + READ_NUMBER("PG_head_mask", PG_head_mask); > READ_NUMBER("PG_swapcache", PG_swapcache); > READ_NUMBER("PG_slab", PG_slab); > READ_NUMBER("PG_buddy", PG_buddy); > @@ -4636,13 +4653,16 @@ __exclude_unnecessary_pages(unsigned long mem_map, > mdf_pfn_t pfn_start, mdf_pfn_t pfn_end, struct cycle *cycle) > { > mdf_pfn_t pfn; > + mdf_pfn_t *pfn_counter; > + mdf_pfn_t nr_pages; > unsigned long index_pg, pfn_mm; > unsigned long long maddr; > mdf_pfn_t pfn_read_start, pfn_read_end; > unsigned char page_cache[SIZE(page) * PGMM_CACHED]; > unsigned char *pcache; > - unsigned int _count, _mapcount = 0; > + unsigned int _count, _mapcount = 0, compound_order = 0; > unsigned long flags, mapping, private = 0; > + unsigned long hugetlb_dtor; > > /* > * If a multi-page exclusion is pending, do it first > @@ -4708,11 +4728,36 @@ __exclude_unnecessary_pages(unsigned long mem_map, > flags = ULONG(pcache + OFFSET(page.flags)); > _count = UINT(pcache + OFFSET(page._count)); > mapping = ULONG(pcache + OFFSET(page.mapping)); > + > + if (index_pg < PGMM_CACHED - 1) { Why not check the compound flags here? Like this: if ((index_pg < PGMM_CACHED - 1) && isCompoundHead(flags)) { > + compound_order = ULONG(pcache + SIZE(page) + OFFSET(page.lru) > + + OFFSET(list_head.prev)); > + hugetlb_dtor = ULONG(pcache + SIZE(page) + OFFSET(page.lru) > + + OFFSET(list_head.next)); > + > + if (compound_order && ^^^^^^^^^^^^^^ BTW this check can be omitted. If compound_order is zero, then it is only assigned zero again, which makes no difference. You can leave the condition here if you find it cleaner, but it is not necessary. > + ((compound_order >= sizeof(unsigned long) * 8) > + || (pfn & ((1UL << compound_order) - 1)) != 0)) { > + /* Invalid order */ > + compound_order = 0; > + } > + } else { > + /* > + * The last pfn of the mem_map cache must not be compound page > + * since all compound pages are aligned to its page order and > + * PGMM_CACHED is 512. ^^^ For clarity I suggest: PGMM_CACHED is a power of 2. > + */ > + compound_order = 0; > + hugetlb_dtor = 0; > + } > + > if (OFFSET(page._mapcount) != NOT_FOUND_STRUCTURE) > _mapcount = UINT(pcache + OFFSET(page._mapcount)); > if (OFFSET(page.private) != NOT_FOUND_STRUCTURE) > private = ULONG(pcache + OFFSET(page.private)); > > + nr_pages = 1; I think you can write: nr_pages = 1 << compound_order; With the added check for hugepage flags (see above) nr_pages then describes how many pages share flags with the current page. > + pfn_counter = NULL; > /* > * Exclude the free page managed by a buddy > * Use buddy identification of free pages whether cyclic or not. > @@ -4720,12 +4765,8 @@ __exclude_unnecessary_pages(unsigned long mem_map, > if ((info->dump_level & DL_EXCLUDE_FREE) > && info->page_is_buddy > && info->page_is_buddy(flags, _mapcount, private, _count)) { > - int nr_pages = 1 << private; > - > - exclude_range(&pfn_free, pfn, pfn + nr_pages, cycle); > - > - pfn += nr_pages - 1; > - mem_map += (nr_pages - 1) * SIZE(page); > + nr_pages = 1 << private; Ok, this assignment is still needed, because free pages should also be treated as one entity, but they are not compound. > + pfn_counter = &pfn_free; > } > /* > * Exclude the cache page without the private page. > @@ -4733,8 +4774,12 @@ __exclude_unnecessary_pages(unsigned long mem_map, > else if ((info->dump_level & DL_EXCLUDE_CACHE) > && (isLRU(flags) || isSwapCache(flags)) > && !isPrivate(flags) && !isAnon(mapping)) { > - if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) > - pfn_cache++; > + /* > + * NOTE: If THP for cache is introduced, the check for > + * compound pages is needed here. > + */ > + nr_pages = 1; But this assignment is not needed, and the code is then ready for THP cache. > + pfn_counter = &pfn_cache; > } > /* > * Exclude the cache page with the private page. > @@ -4742,23 +4787,61 @@ __exclude_unnecessary_pages(unsigned long mem_map, > else if ((info->dump_level & DL_EXCLUDE_CACHE_PRI) > && (isLRU(flags) || isSwapCache(flags)) > && !isAnon(mapping)) { > - if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) > - pfn_cache_private++; > + /* > + * NOTE: If THP for cache is introduced, the check for > + * compound pages is needed here. > + */ > + nr_pages = 1; Same here. If the assignment is removed, the code is THP ready for free. ;-) > + pfn_counter = &pfn_cache_private; > } > /* > * Exclude the data page of the user process. > */ > else if ((info->dump_level & DL_EXCLUDE_USER_DATA) > - && isAnon(mapping)) { > - if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) > - pfn_user++; > + && (isAnon(mapping) || isHugetlb(hugetlb_dtor))) { > + /* > + * Exclude the anonymous pages as user pages. > + */ > + if (isAnon(mapping)) { > + /* > + * Check the compound page > + */ > + if (isCompoundHead(flags) && compound_order > 0) { > + nr_pages = 1 << compound_order; > + } else > + nr_pages = 1; > + } > + /* > + * Exclude the hugetlbfs pages as user pages. > + */ > + else { > + nr_pages = 1 << compound_order; > + } This whole if-else block can go away, AFAICS. > + pfn_counter = &pfn_user; > } > /* > * Exclude the hwpoison page. > */ > else if (isHWPOISON(flags)) { > + nr_pages = 1; Same here. The assignment can be removed. Thanks Petr Tesarik > + pfn_counter = &pfn_hwpoison; > + } > + /* > + * Unexcludable page > + */ > + else > + continue; > + > + /* > + * Execute exclusion > + */ > + if (nr_pages == 1) { > if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) > - pfn_hwpoison++; > + (*pfn_counter)++; > + } else { > + exclude_range(pfn_counter, pfn, pfn + nr_pages, cycle); > + pfn += nr_pages - 1; > + mem_map += (nr_pages - 1) * SIZE(page); > } > } > return TRUE; > diff --git a/makedumpfile.h b/makedumpfile.h > index 9402f05..a2c76d8 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -74,6 +74,7 @@ int get_mem_type(void); > #define PG_lru_ORIGINAL (5) > #define PG_slab_ORIGINAL (7) > #define PG_private_ORIGINAL (11) /* Has something at ->private */ > +#define PG_compound_ORIGINAL (14) /* Is part of a compound page */ > #define PG_swapcache_ORIGINAL (15) /* Swap page: swp_entry_t in private */ > > #define PAGE_BUDDY_MAPCOUNT_VALUE_v2_6_38 (-2) > @@ -148,6 +149,9 @@ test_bit(int nr, unsigned long addr) > > #define isLRU(flags) test_bit(NUMBER(PG_lru), flags) > #define isPrivate(flags) test_bit(NUMBER(PG_private), flags) > +#define isCompoundHead(flags) (!!((flags) & NUMBER(PG_head_mask))) > +#define isHugetlb(dtor) ((SYMBOL(free_huge_page) != NOT_FOUND_SYMBOL) \ > + && (SYMBOL(free_huge_page) == dtor)) > #define isSwapCache(flags) test_bit(NUMBER(PG_swapcache), flags) > #define isHWPOISON(flags) (test_bit(NUMBER(PG_hwpoison), flags) \ > && (NUMBER(PG_hwpoison) != NOT_FOUND_NUMBER)) > @@ -1143,6 +1147,7 @@ struct symbol_table { > unsigned long long node_remap_start_vaddr; > unsigned long long node_remap_end_vaddr; > unsigned long long node_remap_start_pfn; > + unsigned long long free_huge_page; > > /* > * for Xen extraction > @@ -1428,6 +1433,8 @@ struct number_table { > */ > long PG_lru; > long PG_private; > + long PG_head; > + long PG_head_mask; > long PG_swapcache; > long PG_buddy; > long PG_slab;