Re: [PATCH v1 1/1] mm: report per-page metadata information

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thank you Mike Rapoport for reviewing this patch series. Please find my responses below.
 

>  #endif /* _LINUX_VMSTAT_H */
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ba6d39b71cb1..ca36751be50e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1758,6 +1758,10 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
>               destroy_compound_gigantic_folio(folio, huge_page_order(h));
>               free_gigantic_folio(folio, huge_page_order(h));
>       } else {
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> +             __mod_node_page_state(NODE_DATA(page_to_nid(&folio->page)),
> +                                   NR_PAGE_METADATA, -huge_page_order(h));

I don't think memory map will change here with classic SPARSEMEM

Thank you. Yes, I agree with your comment.
 

> +#endif
>               __free_pages(&folio->page, huge_page_order(h));
>       }
>  }
> @@ -2143,7 +2147,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
>               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
>               return NULL;
>       }
> -
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, huge_page_order(h));
> +#endif
>       __count_vm_event(HTLB_BUDDY_PGALLOC);
>       return page_folio(page);
>  }
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 4b9734777f69..7f920bfa8e79 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -214,6 +214,8 @@ static inline void free_vmemmap_page(struct page *page)
>               free_bootmem_page(page);
>       else
>               __free_page(page);
> +     __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> +                           NR_PAGE_METADATA, -1);
>  }

>  /* Free a list of the vmemmap pages */
> @@ -336,6 +338,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
>                         (void *)walk.reuse_addr);
>               list_add(&walk.reuse_page->lru, &vmemmap_pages);
>       }
> +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, 1);

>       /*
>        * In order to make remapping routine most efficient for the huge pages,
> @@ -387,8 +390,12 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,

>       while (nr_pages--) {
>               page = alloc_pages_node(nid, gfp_mask, 0);
> -             if (!page)
> +             if (!page) {
>                       goto out;
> +             } else {
> +                     __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> +                                           NR_PAGE_METADATA, 1);

We can update this once for nr_pages outside the loop, cannot we?

Thank you for the comment. I agree with you and shall incorporate it.
 

> +             }
>               list_add_tail(&page->lru, list);
>       }

> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 50f2f34745af..e02dce7e2e9a 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -26,6 +26,7 @@
>  #include <linux/pgtable.h>
>  #include <linux/swap.h>
>  #include <linux/cma.h>
> +#include <linux/vmstat.h>
>  #include "internal.h"
>  #include "slab.h"
>  #include "shuffle.h"
> @@ -1656,6 +1657,8 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
>                       panic("Failed to allocate %ld bytes for node %d memory map\n",
>                             size, pgdat->node_id);
>               pgdat->node_mem_map = map + offset;
> +             mod_node_early_perpage_metadata(pgdat->node_id,
> +                                             PAGE_ALIGN(size) >> PAGE_SHIFT);
>       }
>       pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
>                               __func__, pgdat->node_id, (unsigned long)pgdat,
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0c5be12f9336..4e295d5087f4 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5443,6 +5443,7 @@ void __init setup_per_cpu_pageset(void)
>       for_each_online_pgdat(pgdat)
>               pgdat->per_cpu_nodestats =
>                       alloc_percpu(struct per_cpu_nodestat);
> +     writeout_early_perpage_metadata();

Why it's called here?
You can copy early stats to actual node stats as soon as the nodes and page
allocator are initialized.

Thank you for mentioning this. I agree with you and shall move it there.
 

>  }

>  __meminit void zone_pcp_init(struct zone *zone)
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index 4548fcc66d74..b5b9d3079e20 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -201,6 +201,8 @@ static int __init alloc_node_page_ext(int nid)
>               return -ENOMEM;
>       NODE_DATA(nid)->node_page_ext = base;
>       total_usage += table_size;
> +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> +                           PAGE_ALIGN(table_size) >> PAGE_SHIFT);
>       return 0;
>  }

> @@ -255,12 +257,15 @@ static void *__meminit alloc_page_ext(size_t size, int nid)
>       void *addr = NULL;

>       addr = alloc_pages_exact_nid(nid, size, flags);
> -     if (addr) {
> +     if (addr)
>               kmemleak_alloc(addr, size, 1, flags);
> -             return addr;
> -     }
> +     else
> +             addr = vzalloc_node(size, nid);

> -     addr = vzalloc_node(size, nid);
> +     if (addr) {
> +             __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> +                                   PAGE_ALIGN(size) >> PAGE_SHIFT);
> +     }

>       return addr;
>  }
> @@ -314,6 +319,10 @@ static void free_page_ext(void *addr)
>               BUG_ON(PageReserved(page));
>               kmemleak_free(addr);
>               free_pages_exact(addr, table_size);
> +
> +             __mod_node_page_state(NODE_DATA(page_to_nid(page)), NR_PAGE_METADATA,
> +                                   (long)-1 * (PAGE_ALIGN(table_size) >> PAGE_SHIFT));
> +

what happens with vmalloc()ed page_ext?

Thank you for pointing this out. I shall also make this change for vmalloc()ed page_ext.
 

>       }
>  }

> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index a2cbe44c48e1..e33f302db7c6 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -469,5 +469,8 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
>       if (r < 0)
>               return NULL;

> +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> +                           PAGE_ALIGN(end - start) >> PAGE_SHIFT);
> +
>       return pfn_to_page(pfn);
>  }
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 77d91e565045..db78233a85ef 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -14,7 +14,7 @@
>  #include <linux/swap.h>
>  #include <linux/swapops.h>
>  #include <linux/bootmem_info.h>
> -
> +#include <linux/vmstat.h>
>  #include "internal.h"
>  #include <asm/dma.h>

> @@ -465,6 +465,9 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
>        */
>       sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
>       sparsemap_buf_end = sparsemap_buf + size;
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> +     mod_node_early_perpage_metadata(nid, PAGE_ALIGN(size) >> PAGE_SHIFT);

All early struct pages are allocated in memmap_alloc(). It'd make sense to update
the counter there.

Thanks for the comment. The reason why we did not do it in memmap_alloc() is because the struct pages can decrease as well.
 

> +#endif
>  }

>  static void __init sparse_buffer_fini(void)
> @@ -641,6 +644,8 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
>       unsigned long start = (unsigned long) pfn_to_page(pfn);
>       unsigned long end = start + nr_pages * sizeof(struct page);

> +     __mod_node_page_state(NODE_DATA(page_to_nid(pfn_to_page(pfn))), NR_PAGE_METADATA,
> +                           (long)-1 * (PAGE_ALIGN(end - start) >> PAGE_SHIFT));
>       vmemmap_free(start, end, altmap);
>  }
>  static void free_map_bootmem(struct page *memmap)
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 00e81e99c6ee..731eb5264b49 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1245,6 +1245,7 @@ const char * const vmstat_text[] = {
>       "pgpromote_success",
>       "pgpromote_candidate",
>  #endif
> +     "nr_page_metadata",

>       /* enum writeback_stat_item counters */
>       "nr_dirty_threshold",
> @@ -2274,4 +2275,24 @@ static int __init extfrag_debug_init(void)
>  }

>  module_init(extfrag_debug_init);
> +
> +// Page metadata size (struct page and page_ext) in pages
> +unsigned long early_perpage_metadata[MAX_NUMNODES] __initdata;

static?

Thanks for pointing this out. I shall make  __initdata static in the next version of the patch.

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux