Re: [PATCH 02/11] mm: Pass order to __alloc_pages_nodemask in GFP flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, May 06, 2019 at 09:06:00PM -0700, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx>
> 
> Save marshalling an extra argument in all the callers at the expense of
> using five bits of the GFP flags.  We still have three GFP bits remaining
> after doing this (and we can release one more by reallocating NORETRY,
> RETRY_MAYFAIL and NOFAIL).
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
> ---
>  arch/x86/events/intel/ds.c |  4 +--
>  arch/x86/kvm/vmx/vmx.c     |  4 +--
>  include/linux/gfp.h        | 51 ++++++++++++++++++++++----------------
>  include/linux/migrate.h    |  2 +-
>  mm/filemap.c               |  2 +-
>  mm/gup.c                   |  4 +--
>  mm/hugetlb.c               |  5 ++--
>  mm/khugepaged.c            |  2 +-
>  mm/mempolicy.c             | 30 +++++++++++-----------
>  mm/migrate.c               |  2 +-
>  mm/page_alloc.c            |  4 +--
>  mm/shmem.c                 |  5 ++--
>  mm/slub.c                  |  2 +-
>  13 files changed, 63 insertions(+), 54 deletions(-)
> 
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 10c99ce1fead..82fee9845b87 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -315,13 +315,13 @@ static void ds_clear_cea(void *cea, size_t size)
>  	preempt_enable();
>  }
>  
> -static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
> +static void *dsalloc_pages(size_t size, gfp_t gfp, int cpu)
>  {
>  	unsigned int order = get_order(size);
>  	int node = cpu_to_node(cpu);
>  	struct page *page;
>  
> -	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
> +	page = __alloc_pages_node(node, gfp | __GFP_ZERO | __GFP_ORDER(order));

Order was derived from size in this function.  Is this truely equal to the old
function?

At a minimum if I am wrong the get_order call above should be removed, no?

Ira

>  	return page ? page_address(page) : NULL;
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ab432a930ae8..323a0f6ffe13 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2380,13 +2380,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>  	return 0;
>  }
>  
> -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
> +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t gfp)
>  {
>  	int node = cpu_to_node(cpu);
>  	struct page *pages;
>  	struct vmcs *vmcs;
>  
> -	pages = __alloc_pages_node(node, flags, vmcs_config.order);
> +	pages = __alloc_pages_node(node, gfp | __GFP_ORDER(vmcs_config.order));
>  	if (!pages)
>  		return NULL;
>  	vmcs = page_address(pages);
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index fb07b503dc45..e7845c2510db 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -219,6 +219,18 @@ struct vm_area_struct;
>  /* Room for N __GFP_FOO bits */
>  #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
>  #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
> +#define __GFP_ORDER(order) ((__force gfp_t)(order << __GFP_BITS_SHIFT))
> +#define __GFP_ORDER_PMD	__GFP_ORDER(PMD_SHIFT - PAGE_SHIFT)
> +#define __GFP_ORDER_PUD	__GFP_ORDER(PUD_SHIFT - PAGE_SHIFT)
> +
> +/*
> + * Extract the order from a GFP bitmask.
> + * Must be the top bits to avoid an AND operation.  Don't let
> + * __GFP_BITS_SHIFT get over 27, or we won't be able to encode orders
> + * above 15 (some architectures allow configuring MAX_ORDER up to 64,
> + * but I doubt larger than 31 are ever used).
> + */
> +#define gfp_order(gfp)	(((__force unsigned int)gfp) >> __GFP_BITS_SHIFT)
>  
>  /**
>   * DOC: Useful GFP flag combinations
> @@ -464,26 +476,23 @@ static inline void arch_alloc_page(struct page *page, int order) { }
>  #endif
>  
>  struct page *
> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
> -							nodemask_t *nodemask);
> +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask);
>  
> -static inline struct page *
> -__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
> +static inline struct page *__alloc_pages(gfp_t gfp_mask, int preferred_nid)
>  {
> -	return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL);
> +	return __alloc_pages_nodemask(gfp_mask, preferred_nid, NULL);
>  }
>  
>  /*
>   * Allocate pages, preferring the node given as nid. The node must be valid and
>   * online. For more general interface, see alloc_pages_node().
>   */
> -static inline struct page *
> -__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
> +static inline struct page *__alloc_pages_node(int nid, gfp_t gfp)
>  {
>  	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
> -	VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
> +	VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
>  
> -	return __alloc_pages(gfp_mask, order, nid);
> +	return __alloc_pages(gfp, nid);
>  }
>  
>  /*
> @@ -497,35 +506,35 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
>  	if (nid == NUMA_NO_NODE)
>  		nid = numa_mem_id();
>  
> -	return __alloc_pages_node(nid, gfp_mask, order);
> +	return __alloc_pages_node(nid, gfp_mask | __GFP_ORDER(order));
>  }
>  
>  #ifdef CONFIG_NUMA
> -extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
> +extern struct page *alloc_pages_current(gfp_t gfp_mask);
>  
>  static inline struct page *
>  alloc_pages(gfp_t gfp_mask, unsigned int order)
>  {
> -	return alloc_pages_current(gfp_mask, order);
> +	return alloc_pages_current(gfp_mask | __GFP_ORDER(order));
>  }
> -extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
> -			struct vm_area_struct *vma, unsigned long addr,
> -			int node, bool hugepage);
> +extern struct page *alloc_pages_vma(gfp_t gfp_mask, struct vm_area_struct *vma,
> +		unsigned long addr, int node, bool hugepage);
>  #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
> -	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
> +	alloc_pages_vma(gfp_mask | __GFP_ORDER(order), vma, addr, \
> +			numa_node_id(), true)
>  #else
>  #define alloc_pages(gfp_mask, order) \
> -		alloc_pages_node(numa_node_id(), gfp_mask, order)
> -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
> -	alloc_pages(gfp_mask, order)
> +	alloc_pages_node(numa_node_id(), gfp_mask, order)
> +#define alloc_pages_vma(gfp_mask, vma, addr, node, false) \
> +	alloc_pages(gfp_mask, 0)
>  #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
>  	alloc_pages(gfp_mask, order)
>  #endif
>  #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
>  #define alloc_page_vma(gfp_mask, vma, addr)			\
> -	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
> +	alloc_pages_vma(gfp_mask, vma, addr, numa_node_id(), false)
>  #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
> -	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
> +	alloc_pages_vma(gfp_mask, vma, addr, node, false)
>  
>  extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
>  extern unsigned long get_zeroed_page(gfp_t gfp_mask);
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index e13d9bf2f9a5..ba4385144cc9 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -50,7 +50,7 @@ static inline struct page *new_page_nodemask(struct page *page,
>  	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
>  		gfp_mask |= __GFP_HIGHMEM;
>  
> -	new_page = __alloc_pages_nodemask(gfp_mask, order,
> +	new_page = __alloc_pages_nodemask(gfp_mask | __GFP_ORDER(order),
>  				preferred_nid, nodemask);
>  
>  	if (new_page && PageTransHuge(new_page))
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 3ad18fa56057..b7b0841312c9 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -945,7 +945,7 @@ struct page *__page_cache_alloc(gfp_t gfp)
>  		do {
>  			cpuset_mems_cookie = read_mems_allowed_begin();
>  			n = cpuset_mem_spread_node();
> -			page = __alloc_pages_node(n, gfp, 0);
> +			page = __alloc_pages_node(n, gfp);
>  		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
>  
>  		return page;
> diff --git a/mm/gup.c b/mm/gup.c
> index 294e87ae5b9a..7b06962a4630 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1306,14 +1306,14 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
>  		 * CMA area again.
>  		 */
>  		thp_gfpmask &= ~__GFP_MOVABLE;
> -		thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
> +		thp = __alloc_pages_node(nid, thp_gfpmask | __GFP_PMD_ORDER);
>  		if (!thp)
>  			return NULL;
>  		prep_transhuge_page(thp);
>  		return thp;
>  	}
>  
> -	return __alloc_pages_node(nid, gfp_mask, 0);
> +	return __alloc_pages_node(nid, gfp_mask);
>  }
>  
>  static long check_and_migrate_cma_pages(struct task_struct *tsk,
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 109f5de82910..f3f0f2902a52 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1401,10 +1401,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
>  	int order = huge_page_order(h);
>  	struct page *page;
>  
> -	gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
> +	gfp_mask |= __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
> +			__GFP_ORDER(order);
>  	if (nid == NUMA_NO_NODE)
>  		nid = numa_mem_id();
> -	page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
> +	page = __alloc_pages_nodemask(gfp_mask, nid, nmask);
>  	if (page)
>  		__count_vm_event(HTLB_BUDDY_PGALLOC);
>  	else
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index a335f7c1fac4..3d9267394881 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -770,7 +770,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
>  {
>  	VM_BUG_ON_PAGE(*hpage, *hpage);
>  
> -	*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
> +	*hpage = __alloc_pages_node(node, gfp | __GFP_PMD_ORDER);
>  	if (unlikely(!*hpage)) {
>  		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
>  		*hpage = ERR_PTR(-ENOMEM);
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 2219e747df49..bad60476d5ad 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -975,7 +975,7 @@ struct page *alloc_new_node_page(struct page *page, unsigned long node)
>  		return thp;
>  	} else
>  		return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE |
> -						    __GFP_THISNODE, 0);
> +						    __GFP_THISNODE);
>  }
>  
>  /*
> @@ -2006,12 +2006,11 @@ bool mempolicy_nodemask_intersects(struct task_struct *tsk,
>  
>  /* Allocate a page in interleaved policy.
>     Own path because it needs to do special accounting. */
> -static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
> -					unsigned nid)
> +static struct page *alloc_page_interleave(gfp_t gfp, unsigned nid)
>  {
>  	struct page *page;
>  
> -	page = __alloc_pages(gfp, order, nid);
> +	page = __alloc_pages(gfp, nid);
>  	/* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */
>  	if (!static_branch_likely(&vm_numa_stat_key))
>  		return page;
> @@ -2033,7 +2032,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
>   *      %GFP_FS      allocation should not call back into a file system.
>   *      %GFP_ATOMIC  don't sleep.
>   *
> - *	@order:Order of the GFP allocation.
>   * 	@vma:  Pointer to VMA or NULL if not available.
>   *	@addr: Virtual Address of the allocation. Must be inside the VMA.
>   *	@node: Which node to prefer for allocation (modulo policy).
> @@ -2047,8 +2045,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
>   *	NULL when no page can be allocated.
>   */
>  struct page *
> -alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
> -		unsigned long addr, int node, bool hugepage)
> +alloc_pages_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr,
> +		int node, bool hugepage)
>  {
>  	struct mempolicy *pol;
>  	struct page *page;
> @@ -2060,9 +2058,10 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
>  	if (pol->mode == MPOL_INTERLEAVE) {
>  		unsigned nid;
>  
> -		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
> +		nid = interleave_nid(pol, vma, addr,
> +				PAGE_SHIFT + gfp_order(gfp));
>  		mpol_cond_put(pol);
> -		page = alloc_page_interleave(gfp, order, nid);
> +		page = alloc_page_interleave(gfp, nid);
>  		goto out;
>  	}
>  
> @@ -2086,14 +2085,14 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
>  		if (!nmask || node_isset(hpage_node, *nmask)) {
>  			mpol_cond_put(pol);
>  			page = __alloc_pages_node(hpage_node,
> -						gfp | __GFP_THISNODE, order);
> +						gfp | __GFP_THISNODE);
>  			goto out;
>  		}
>  	}
>  
>  	nmask = policy_nodemask(gfp, pol);
>  	preferred_nid = policy_node(gfp, pol, node);
> -	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
> +	page = __alloc_pages_nodemask(gfp, preferred_nid, nmask);
>  	mpol_cond_put(pol);
>  out:
>  	return page;
> @@ -2108,13 +2107,12 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
>   *      	%GFP_HIGHMEM highmem allocation,
>   *      	%GFP_FS     don't call back into a file system.
>   *      	%GFP_ATOMIC don't sleep.
> - *	@order: Power of two of allocation size in pages. 0 is a single page.
>   *
>   *	Allocate a page from the kernel page pool.  When not in
> - *	interrupt context and apply the current process NUMA policy.
> + *	interrupt context apply the current process NUMA policy.
>   *	Returns NULL when no page can be allocated.
>   */
> -struct page *alloc_pages_current(gfp_t gfp, unsigned order)
> +struct page *alloc_pages_current(gfp_t gfp)
>  {
>  	struct mempolicy *pol = &default_policy;
>  	struct page *page;
> @@ -2127,9 +2125,9 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
>  	 * nor system default_policy
>  	 */
>  	if (pol->mode == MPOL_INTERLEAVE)
> -		page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
> +		page = alloc_page_interleave(gfp, interleave_nodes(pol));
>  	else
> -		page = __alloc_pages_nodemask(gfp, order,
> +		page = __alloc_pages_nodemask(gfp,
>  				policy_node(gfp, pol, numa_node_id()),
>  				policy_nodemask(gfp, pol));
>  
> diff --git a/mm/migrate.c b/mm/migrate.c
> index f2ecc2855a12..acb479132398 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1884,7 +1884,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
>  					 (GFP_HIGHUSER_MOVABLE |
>  					  __GFP_THISNODE | __GFP_NOMEMALLOC |
>  					  __GFP_NORETRY | __GFP_NOWARN) &
> -					 ~__GFP_RECLAIM, 0);
> +					 ~__GFP_RECLAIM);
>  
>  	return newpage;
>  }
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index deea16489e2b..13191fe2f19d 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4610,11 +4610,11 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
>   * This is the 'heart' of the zoned buddy allocator.
>   */
>  struct page *
> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
> -							nodemask_t *nodemask)
> +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask)
>  {
>  	struct page *page;
>  	unsigned int alloc_flags = ALLOC_WMARK_LOW;
> +	int order = gfp_order(gfp_mask);
>  	gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
>  	struct alloc_context ac = { };
>  
> diff --git a/mm/shmem.c b/mm/shmem.c
> index a1e9f6194138..445e76e5c0c2 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1463,8 +1463,9 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
>  		return NULL;
>  
>  	shmem_pseudo_vma_init(&pvma, info, hindex);
> -	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
> -			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
> +	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY |
> +				__GFP_NOWARN | __GFP_PMD_ORDER,
> +			&pvma, 0, numa_node_id(), true);
>  	shmem_pseudo_vma_destroy(&pvma);
>  	if (page)
>  		prep_transhuge_page(page);
> diff --git a/mm/slub.c b/mm/slub.c
> index a34fbe1f6ede..7504fa3f844b 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1497,7 +1497,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
>  	if (node == NUMA_NO_NODE)
>  		page = alloc_pages(flags, order);
>  	else
> -		page = __alloc_pages_node(node, flags, order);
> +		page = __alloc_pages_node(node, flags | __GFP_ORDER(order));
>  
>  	if (page && memcg_charge_slab(page, flags, order, s)) {
>  		__free_pages(page, order);
> -- 
> 2.20.1
> 




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux