Re: [PATCH v2 06/12] mm/hugetlb: make hugetlb migration target allocation APIs CMA aware

Michal Hocko <mhocko@xxxxxxxxxx> · Tue, 9 Jun 2020 15:53:25 +0200

On Wed 27-05-20 15:44:57, Joonsoo Kim wrote:
> From: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
> 
> There is a user who do not want to use CMA memory for migration. Until
> now, it is implemented by caller side but it's not optimal since there
> is limited information on caller. This patch implements it on callee side
> to get better result.

I do not follow this changelog and honestly do not see an improvement.
skip_cma in the alloc_control sound like a hack to me. I can now see
why your earlier patch has started to or the given gfp_mask. If anything
this should be folded here. But even then I do not like a partial
gfp_mask (__GFP_NOWARN on its own really has GFP_NOWAIT like semantic).

> Acked-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
> ---
>  include/linux/hugetlb.h |  2 --
>  mm/gup.c                |  9 +++------
>  mm/hugetlb.c            | 21 +++++++++++++++++----
>  mm/internal.h           |  1 +
>  4 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index f482563..3d05f7d 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -503,8 +503,6 @@ struct huge_bootmem_page {
>  	struct hstate *hstate;
>  };
>  
> -struct page *alloc_migrate_huge_page(struct hstate *h,
> -				struct alloc_control *ac);
>  struct page *alloc_huge_page_nodemask(struct hstate *h,
>  				struct alloc_control *ac);
>  struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> diff --git a/mm/gup.c b/mm/gup.c
> index 6b78f11..87eca79 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1617,14 +1617,11 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
>  		struct alloc_control ac = {
>  			.nid = nid,
>  			.nmask = NULL,
> -			.gfp_mask = gfp_mask,
> +			.gfp_mask = __GFP_NOWARN,
> +			.skip_cma = true,
>  		};
>  
> -		/*
> -		 * We don't want to dequeue from the pool because pool pages will
> -		 * mostly be from the CMA region.
> -		 */
> -		return alloc_migrate_huge_page(h, &ac);
> +		return alloc_huge_page_nodemask(h, &ac);
>  	}
>  
>  	if (PageTransHuge(page)) {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 8132985..e465582 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1033,13 +1033,19 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
>  	h->free_huge_pages_node[nid]++;
>  }
>  
> -static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
> +static struct page *dequeue_huge_page_node_exact(struct hstate *h,
> +						int nid, bool skip_cma)
>  {
>  	struct page *page;
>  
> -	list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
> +	list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
> +		if (skip_cma && is_migrate_cma_page(page))
> +			continue;
> +
>  		if (!PageHWPoison(page))
>  			break;
> +	}
> +
>  	/*
>  	 * if 'non-isolated free hugepage' not found on the list,
>  	 * the allocation fails.
> @@ -1080,7 +1086,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h,
>  			continue;
>  		node = zone_to_nid(zone);
>  
> -		page = dequeue_huge_page_node_exact(h, node);
> +		page = dequeue_huge_page_node_exact(h, node, ac->skip_cma);
>  		if (page)
>  			return page;
>  	}
> @@ -1937,7 +1943,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
>  	return page;
>  }
>  
> -struct page *alloc_migrate_huge_page(struct hstate *h,
> +static struct page *alloc_migrate_huge_page(struct hstate *h,
>  				struct alloc_control *ac)
>  {
>  	struct page *page;
> @@ -1999,6 +2005,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h,
>  	}
>  	spin_unlock(&hugetlb_lock);
>  
> +	/*
> +	 * clearing __GFP_MOVABLE flag ensure that allocated page
> +	 * will not come from CMA area
> +	 */
> +	if (ac->skip_cma)
> +		ac->gfp_mask &= ~__GFP_MOVABLE;
> +
>  	return alloc_migrate_huge_page(h, ac);
>  }
>  
> diff --git a/mm/internal.h b/mm/internal.h
> index 6e613ce..159cfd6 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -618,6 +618,7 @@ struct alloc_control {
>  	int nid;		/* preferred node id */
>  	nodemask_t *nmask;
>  	gfp_t gfp_mask;
> +	bool skip_cma;
>  };
>  
>  #endif	/* __MM_INTERNAL_H */
> -- 
> 2.7.4
> 

-- 
Michal Hocko
SUSE Labs