Re: [PATCH] mm/hugetlb: Concentrated releases memory when cmdline specifies node requests for large pages

Muchun Song <muchun.song@xxxxxxxxx> · Sat, 12 Oct 2024 11:22:20 +0800

> On Oct 11, 2024, at 16:05, suhua <suhua.tanke@xxxxxxxxx> wrote:
> 
> When HVO is enabled and huge page memory allocs are made, the freed memory
> can be aggregated into higher order memory in the following paths, which
> facilitates further allocs for higher order memory.
> 
> echo 200000 > /proc/sys/vm/nr_hugepages
> echo 200000 > /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
> grub： default_hugepagesz=2M hugepagesz=2M hugepages=200000
> 
> Currently not support for releasing aggregations to higher order in the
> following way, which will releasing to lower order.
> 
> grub: default_hugepagesz=2M hugepagesz=2M hugepages=0:100000,1:100000
> 
> This patch supports the release of huge page optimizations aggregates to
> higher order memory.
> 
> eg:
> cat /proc/cmdline
> BOOT_IMAGE=/boot/vmlinuz-xxx ... default_hugepagesz=2M hugepagesz=2M hugepages=0:100000,1:100000
> 
> Before:
> Free pages count per migrate type at order       0      1      2      3      4      5      6      7      8      9     10
> ...
> Node    0, zone   Normal, type    Unmovable  55282  97039  99307      0      1      1      0      1      1      1      0
> Node    0, zone   Normal, type      Movable     25     11    345     87     48     21      2     20      9      3  75061
> Node    0, zone   Normal, type  Reclaimable      4      2      2      4      3      0      2      1      1      1      0
> Node    0, zone   Normal, type   HighAtomic      0      0      0      0      0      0      0      0      0      0      0
> ...
> Free pages count per migrate type at order       0      1      2      3      4      5      6      7      8      9     10
> Node    1, zone   Normal, type    Unmovable  98888  99650  99679      2      3      1      2      2      2      0      0
> Node    1, zone   Normal, type      Movable      1      1      0      1      1      0      1      0      1      1  75937
> Node    1, zone   Normal, type  Reclaimable      0      0      0      0      0      0      0      0      0      0      0
> Node    1, zone   Normal, type   HighAtomic      0      0      0      0      0      0      0      0      0      0      0
> 
> After:
> Free pages count per migrate type at order       0      1      2      3      4      5      6      7      8      9     10
> ...
> Node    0, zone   Normal, type    Unmovable    152    158     37      2      2      0      3      4      2      6    717
> Node    0, zone   Normal, type      Movable      1     37     53      3     55     49     16      6      2      1  75000
> Node    0, zone   Normal, type  Reclaimable      1      4      3      1      2      1      1      1      1      1      0
> Node    0, zone   Normal, type   HighAtomic      0      0      0      0      0      0      0      0      0      0      0
> ...
> Free pages count per migrate type at order       0      1      2      3      4      5      6      7      8      9     10
> Node    1, zone   Normal, type    Unmovable      5      3      2      1      3      4      2      2      2      0    779
> Node    1, zone   Normal, type      Movable      1      0      1      1      1      0      1      0      1      1  75849
> Node    1, zone   Normal, type  Reclaimable      0      0      0      0      0      0      0      0      0      0      0
> Node    1, zone   Normal, type   HighAtomic      0      0      0      0      0      0      0      0      0      0      0

A good result. But the subject could be changed to:

	"mm/hugetlb: perform vmemmap optimization batchly for specific node allocation"

> 
> Signed-off-by: suhua <suhua1@xxxxxxxxxxxx>
> ---
> mm/hugetlb.c | 37 +++++++++++++++++++++++++++++++++----
> 1 file changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 190fa05635f4..3441d380c90b 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2077,6 +2077,24 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
> 	return folio;
> }
> 
> +static struct folio *only_alloc_and_account_fresh_hugetlb_folio(
> + 		struct hstate *h, gfp_t gfp_mask,
> + 		int nid, nodemask_t *nmask)
> +{
> + 	struct folio *folio;
> +
> + 	folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
> + 	if (!folio)
> + 		return NULL;
> +
> + 	spin_lock_irq(&hugetlb_lock);
> + 	h->nr_huge_pages++;
> + 	h->nr_huge_pages_node[nid]++;
> + 	spin_unlock_irq(&hugetlb_lock);
> +
> + 	return folio;
> +}
> +
> /*
>  * Common helper to allocate a fresh hugetlb page. All specific allocators
>  * should use this function to get new hugetlb pages
> @@ -3301,23 +3319,34 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
> {
> 	unsigned long i;
> 	char buf[32];
> + 	LIST_HEAD(folio_list);
> + 	struct folio *folio, *tmp_f;
> 
> 	for (i = 0; i < h->max_huge_pages_node[nid]; ++i) {
> 		if (hstate_is_gigantic(h)) {
> 			if (!alloc_bootmem_huge_page(h, nid))
> 				break;
> 		} else {
> - 			struct folio *folio;
> 			gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
> 
> - 			folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
> - 					&node_states[N_MEMORY]);
> + 			folio = only_alloc_and_account_fresh_hugetlb_folio(h,
> + 					gfp_mask, nid, &node_states[N_MEMORY]);

I think we could use only_alloc_fresh_hugetlb_folio plus prep_and_add_allocated_folios
to achieve the same goal but more simpler, right?

> 			if (!folio)
> 				break;
> - 			free_huge_folio(folio); /* free it into the hugepage allocator */
> + 			list_add(&folio->lru, &folio_list);
> 		}
> 		cond_resched();
> 	}
> +
> + 	if (!list_empty(&folio_list)) {
> + 		/* Send list for bulk vmemmap optimization processing */
> + 		hugetlb_vmemmap_optimize_folios(h, &folio_list);
> +
> + 		list_for_each_entry_safe(folio, tmp_f, &folio_list, lru) {
> + 			free_huge_folio(folio); /* free it into the hugepage allocator */
> + 		}
> + 	}

We could use prep_and_add_allocated_folios here.

Thanks.

> +
> 	if (i == h->max_huge_pages_node[nid])
> 		return;
> 
> -- 
> 2.34.1
>