Thank you Mike Rapoport for reviewing this patch series. Please find my responses below.
> #endif /* _LINUX_VMSTAT_H */
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ba6d39b71cb1..ca36751be50e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1758,6 +1758,10 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
> destroy_compound_gigantic_folio(folio, huge_page_order(h));
> free_gigantic_folio(folio, huge_page_order(h));
> } else {
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> + __mod_node_page_state(NODE_DATA(page_to_nid(&folio->page)),
> + NR_PAGE_METADATA, -huge_page_order(h));
I don't think memory map will change here with classic SPARSEMEM
Thank you. Yes, I agree with your comment.
> +#endif
> __free_pages(&folio->page, huge_page_order(h));
> }
> }
> @@ -2143,7 +2147,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
> __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
> return NULL;
> }
> -
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, huge_page_order(h));
> +#endif
> __count_vm_event(HTLB_BUDDY_PGALLOC);
> return page_folio(page);
> }
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 4b9734777f69..7f920bfa8e79 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -214,6 +214,8 @@ static inline void free_vmemmap_page(struct page *page)
> free_bootmem_page(page);
> else
> __free_page(page);
> + __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> + NR_PAGE_METADATA, -1);
> }
>
> /* Free a list of the vmemmap pages */
> @@ -336,6 +338,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
> (void *)walk.reuse_addr);
> list_add(&walk.reuse_page->lru, &vmemmap_pages);
> }
> + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, 1);
>
> /*
> * In order to make remapping routine most efficient for the huge pages,
> @@ -387,8 +390,12 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
>
> while (nr_pages--) {
> page = alloc_pages_node(nid, gfp_mask, 0);
> - if (!page)
> + if (!page) {
> goto out;
> + } else {
> + __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> + NR_PAGE_METADATA, 1);
We can update this once for nr_pages outside the loop, cannot we?
Thank you for the comment. I agree with you and shall incorporate it.
> + }
> list_add_tail(&page->lru, list);
> }
>
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 50f2f34745af..e02dce7e2e9a 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -26,6 +26,7 @@
> #include <linux/pgtable.h>
> #include <linux/swap.h>
> #include <linux/cma.h>
> +#include <linux/vmstat.h>
> #include "internal.h"
> #include "slab.h"
> #include "shuffle.h"
> @@ -1656,6 +1657,8 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
> panic("Failed to allocate %ld bytes for node %d memory map\n",
> size, pgdat->node_id);
> pgdat->node_mem_map = map + offset;
> + mod_node_early_perpage_metadata(pgdat->node_id,
> + PAGE_ALIGN(size) >> PAGE_SHIFT);
> }
> pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
> __func__, pgdat->node_id, (unsigned long)pgdat,
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0c5be12f9336..4e295d5087f4 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5443,6 +5443,7 @@ void __init setup_per_cpu_pageset(void)
> for_each_online_pgdat(pgdat)
> pgdat->per_cpu_nodestats =
> alloc_percpu(struct per_cpu_nodestat);
> + writeout_early_perpage_metadata();
Why it's called here?
You can copy early stats to actual node stats as soon as the nodes and page
allocator are initialized.
Thank you for mentioning this. I agree with you and shall move it there.
> }
>
> __meminit void zone_pcp_init(struct zone *zone)
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index 4548fcc66d74..b5b9d3079e20 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -201,6 +201,8 @@ static int __init alloc_node_page_ext(int nid)
> return -ENOMEM;
> NODE_DATA(nid)->node_page_ext = base;
> total_usage += table_size;
> + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> + PAGE_ALIGN(table_size) >> PAGE_SHIFT);
> return 0;
> }
>
> @@ -255,12 +257,15 @@ static void *__meminit alloc_page_ext(size_t size, int nid)
> void *addr = NULL;
>
> addr = alloc_pages_exact_nid(nid, size, flags);
> - if (addr) {
> + if (addr)
> kmemleak_alloc(addr, size, 1, flags);
> - return addr;
> - }
> + else
> + addr = vzalloc_node(size, nid);
>
> - addr = vzalloc_node(size, nid);
> + if (addr) {
> + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> + PAGE_ALIGN(size) >> PAGE_SHIFT);
> + }
>
> return addr;
> }
> @@ -314,6 +319,10 @@ static void free_page_ext(void *addr)
> BUG_ON(PageReserved(page));
> kmemleak_free(addr);
> free_pages_exact(addr, table_size);
> +
> + __mod_node_page_state(NODE_DATA(page_to_nid(page)), NR_PAGE_METADATA,
> + (long)-1 * (PAGE_ALIGN(table_size) >> PAGE_SHIFT));
> +
what happens with vmalloc()ed page_ext?
Thank you for pointing this out. I shall also make this change for vmalloc()ed page_ext.
> }
> }
>
> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index a2cbe44c48e1..e33f302db7c6 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -469,5 +469,8 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
> if (r < 0)
> return NULL;
>
> + __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> + PAGE_ALIGN(end - start) >> PAGE_SHIFT);
> +
> return pfn_to_page(pfn);
> }
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 77d91e565045..db78233a85ef 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -14,7 +14,7 @@
> #include <linux/swap.h>
> #include <linux/swapops.h>
> #include <linux/bootmem_info.h>
> -
> +#include <linux/vmstat.h>
> #include "internal.h"
> #include <asm/dma.h>
>
> @@ -465,6 +465,9 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
> */
> sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
> sparsemap_buf_end = sparsemap_buf + size;
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> + mod_node_early_perpage_metadata(nid, PAGE_ALIGN(size) >> PAGE_SHIFT);
All early struct pages are allocated in memmap_alloc(). It'd make sense to update
the counter there.
Thanks for the comment. The reason why we did not do it in memmap_alloc() is because the struct pages can decrease as well.
> +#endif
> }
>
> static void __init sparse_buffer_fini(void)
> @@ -641,6 +644,8 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
> unsigned long start = (unsigned long) pfn_to_page(pfn);
> unsigned long end = start + nr_pages * sizeof(struct page);
>
> + __mod_node_page_state(NODE_DATA(page_to_nid(pfn_to_page(pfn))), NR_PAGE_METADATA,
> + (long)-1 * (PAGE_ALIGN(end - start) >> PAGE_SHIFT));
> vmemmap_free(start, end, altmap);
> }
> static void free_map_bootmem(struct page *memmap)
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 00e81e99c6ee..731eb5264b49 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1245,6 +1245,7 @@ const char * const vmstat_text[] = {
> "pgpromote_success",
> "pgpromote_candidate",
> #endif
> + "nr_page_metadata",
>
> /* enum writeback_stat_item counters */
> "nr_dirty_threshold",
> @@ -2274,4 +2275,24 @@ static int __init extfrag_debug_init(void)
> }
>
> module_init(extfrag_debug_init);
> +
> +// Page metadata size (struct page and page_ext) in pages
> +unsigned long early_perpage_metadata[MAX_NUMNODES] __initdata;
static?
Thanks for pointing this out. I shall make __initdata static in the next version of the patch.