On Mon, May 06, 2019 at 09:06:00PM -0700, Matthew Wilcox wrote: > From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> > > Save marshalling an extra argument in all the callers at the expense of > using five bits of the GFP flags. We still have three GFP bits remaining > after doing this (and we can release one more by reallocating NORETRY, > RETRY_MAYFAIL and NOFAIL). > > Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> > --- > arch/x86/events/intel/ds.c | 4 +-- > arch/x86/kvm/vmx/vmx.c | 4 +-- > include/linux/gfp.h | 51 ++++++++++++++++++++++---------------- > include/linux/migrate.h | 2 +- > mm/filemap.c | 2 +- > mm/gup.c | 4 +-- > mm/hugetlb.c | 5 ++-- > mm/khugepaged.c | 2 +- > mm/mempolicy.c | 30 +++++++++++----------- > mm/migrate.c | 2 +- > mm/page_alloc.c | 4 +-- > mm/shmem.c | 5 ++-- > mm/slub.c | 2 +- > 13 files changed, 63 insertions(+), 54 deletions(-) > > diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c > index 10c99ce1fead..82fee9845b87 100644 > --- a/arch/x86/events/intel/ds.c > +++ b/arch/x86/events/intel/ds.c > @@ -315,13 +315,13 @@ static void ds_clear_cea(void *cea, size_t size) > preempt_enable(); > } > > -static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) > +static void *dsalloc_pages(size_t size, gfp_t gfp, int cpu) > { > unsigned int order = get_order(size); > int node = cpu_to_node(cpu); > struct page *page; > > - page = __alloc_pages_node(node, flags | __GFP_ZERO, order); > + page = __alloc_pages_node(node, gfp | __GFP_ZERO | __GFP_ORDER(order)); Order was derived from size in this function. Is this truely equal to the old function? At a minimum if I am wrong the get_order call above should be removed, no? Ira > return page ? page_address(page) : NULL; > } > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index ab432a930ae8..323a0f6ffe13 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -2380,13 +2380,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, > return 0; > } > > -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) > +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t gfp) > { > int node = cpu_to_node(cpu); > struct page *pages; > struct vmcs *vmcs; > > - pages = __alloc_pages_node(node, flags, vmcs_config.order); > + pages = __alloc_pages_node(node, gfp | __GFP_ORDER(vmcs_config.order)); > if (!pages) > return NULL; > vmcs = page_address(pages); > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index fb07b503dc45..e7845c2510db 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -219,6 +219,18 @@ struct vm_area_struct; > /* Room for N __GFP_FOO bits */ > #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) > #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) > +#define __GFP_ORDER(order) ((__force gfp_t)(order << __GFP_BITS_SHIFT)) > +#define __GFP_ORDER_PMD __GFP_ORDER(PMD_SHIFT - PAGE_SHIFT) > +#define __GFP_ORDER_PUD __GFP_ORDER(PUD_SHIFT - PAGE_SHIFT) > + > +/* > + * Extract the order from a GFP bitmask. > + * Must be the top bits to avoid an AND operation. Don't let > + * __GFP_BITS_SHIFT get over 27, or we won't be able to encode orders > + * above 15 (some architectures allow configuring MAX_ORDER up to 64, > + * but I doubt larger than 31 are ever used). > + */ > +#define gfp_order(gfp) (((__force unsigned int)gfp) >> __GFP_BITS_SHIFT) > > /** > * DOC: Useful GFP flag combinations > @@ -464,26 +476,23 @@ static inline void arch_alloc_page(struct page *page, int order) { } > #endif > > struct page * > -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, > - nodemask_t *nodemask); > +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask); > > -static inline struct page * > -__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) > +static inline struct page *__alloc_pages(gfp_t gfp_mask, int preferred_nid) > { > - return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); > + return __alloc_pages_nodemask(gfp_mask, preferred_nid, NULL); > } > > /* > * Allocate pages, preferring the node given as nid. The node must be valid and > * online. For more general interface, see alloc_pages_node(). > */ > -static inline struct page * > -__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) > +static inline struct page *__alloc_pages_node(int nid, gfp_t gfp) > { > VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); > - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); > + VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); > > - return __alloc_pages(gfp_mask, order, nid); > + return __alloc_pages(gfp, nid); > } > > /* > @@ -497,35 +506,35 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, > if (nid == NUMA_NO_NODE) > nid = numa_mem_id(); > > - return __alloc_pages_node(nid, gfp_mask, order); > + return __alloc_pages_node(nid, gfp_mask | __GFP_ORDER(order)); > } > > #ifdef CONFIG_NUMA > -extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); > +extern struct page *alloc_pages_current(gfp_t gfp_mask); > > static inline struct page * > alloc_pages(gfp_t gfp_mask, unsigned int order) > { > - return alloc_pages_current(gfp_mask, order); > + return alloc_pages_current(gfp_mask | __GFP_ORDER(order)); > } > -extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, > - struct vm_area_struct *vma, unsigned long addr, > - int node, bool hugepage); > +extern struct page *alloc_pages_vma(gfp_t gfp_mask, struct vm_area_struct *vma, > + unsigned long addr, int node, bool hugepage); > #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ > - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) > + alloc_pages_vma(gfp_mask | __GFP_ORDER(order), vma, addr, \ > + numa_node_id(), true) > #else > #define alloc_pages(gfp_mask, order) \ > - alloc_pages_node(numa_node_id(), gfp_mask, order) > -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ > - alloc_pages(gfp_mask, order) > + alloc_pages_node(numa_node_id(), gfp_mask, order) > +#define alloc_pages_vma(gfp_mask, vma, addr, node, false) \ > + alloc_pages(gfp_mask, 0) > #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ > alloc_pages(gfp_mask, order) > #endif > #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) > #define alloc_page_vma(gfp_mask, vma, addr) \ > - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) > + alloc_pages_vma(gfp_mask, vma, addr, numa_node_id(), false) > #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ > - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) > + alloc_pages_vma(gfp_mask, vma, addr, node, false) > > extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); > extern unsigned long get_zeroed_page(gfp_t gfp_mask); > diff --git a/include/linux/migrate.h b/include/linux/migrate.h > index e13d9bf2f9a5..ba4385144cc9 100644 > --- a/include/linux/migrate.h > +++ b/include/linux/migrate.h > @@ -50,7 +50,7 @@ static inline struct page *new_page_nodemask(struct page *page, > if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) > gfp_mask |= __GFP_HIGHMEM; > > - new_page = __alloc_pages_nodemask(gfp_mask, order, > + new_page = __alloc_pages_nodemask(gfp_mask | __GFP_ORDER(order), > preferred_nid, nodemask); > > if (new_page && PageTransHuge(new_page)) > diff --git a/mm/filemap.c b/mm/filemap.c > index 3ad18fa56057..b7b0841312c9 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -945,7 +945,7 @@ struct page *__page_cache_alloc(gfp_t gfp) > do { > cpuset_mems_cookie = read_mems_allowed_begin(); > n = cpuset_mem_spread_node(); > - page = __alloc_pages_node(n, gfp, 0); > + page = __alloc_pages_node(n, gfp); > } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); > > return page; > diff --git a/mm/gup.c b/mm/gup.c > index 294e87ae5b9a..7b06962a4630 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -1306,14 +1306,14 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) > * CMA area again. > */ > thp_gfpmask &= ~__GFP_MOVABLE; > - thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER); > + thp = __alloc_pages_node(nid, thp_gfpmask | __GFP_PMD_ORDER); > if (!thp) > return NULL; > prep_transhuge_page(thp); > return thp; > } > > - return __alloc_pages_node(nid, gfp_mask, 0); > + return __alloc_pages_node(nid, gfp_mask); > } > > static long check_and_migrate_cma_pages(struct task_struct *tsk, > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 109f5de82910..f3f0f2902a52 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1401,10 +1401,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, > int order = huge_page_order(h); > struct page *page; > > - gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; > + gfp_mask |= __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | > + __GFP_ORDER(order); > if (nid == NUMA_NO_NODE) > nid = numa_mem_id(); > - page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask); > + page = __alloc_pages_nodemask(gfp_mask, nid, nmask); > if (page) > __count_vm_event(HTLB_BUDDY_PGALLOC); > else > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index a335f7c1fac4..3d9267394881 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -770,7 +770,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node) > { > VM_BUG_ON_PAGE(*hpage, *hpage); > > - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); > + *hpage = __alloc_pages_node(node, gfp | __GFP_PMD_ORDER); > if (unlikely(!*hpage)) { > count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > *hpage = ERR_PTR(-ENOMEM); > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 2219e747df49..bad60476d5ad 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -975,7 +975,7 @@ struct page *alloc_new_node_page(struct page *page, unsigned long node) > return thp; > } else > return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE | > - __GFP_THISNODE, 0); > + __GFP_THISNODE); > } > > /* > @@ -2006,12 +2006,11 @@ bool mempolicy_nodemask_intersects(struct task_struct *tsk, > > /* Allocate a page in interleaved policy. > Own path because it needs to do special accounting. */ > -static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, > - unsigned nid) > +static struct page *alloc_page_interleave(gfp_t gfp, unsigned nid) > { > struct page *page; > > - page = __alloc_pages(gfp, order, nid); > + page = __alloc_pages(gfp, nid); > /* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */ > if (!static_branch_likely(&vm_numa_stat_key)) > return page; > @@ -2033,7 +2032,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, > * %GFP_FS allocation should not call back into a file system. > * %GFP_ATOMIC don't sleep. > * > - * @order:Order of the GFP allocation. > * @vma: Pointer to VMA or NULL if not available. > * @addr: Virtual Address of the allocation. Must be inside the VMA. > * @node: Which node to prefer for allocation (modulo policy). > @@ -2047,8 +2045,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, > * NULL when no page can be allocated. > */ > struct page * > -alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > - unsigned long addr, int node, bool hugepage) > +alloc_pages_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr, > + int node, bool hugepage) > { > struct mempolicy *pol; > struct page *page; > @@ -2060,9 +2058,10 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > if (pol->mode == MPOL_INTERLEAVE) { > unsigned nid; > > - nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); > + nid = interleave_nid(pol, vma, addr, > + PAGE_SHIFT + gfp_order(gfp)); > mpol_cond_put(pol); > - page = alloc_page_interleave(gfp, order, nid); > + page = alloc_page_interleave(gfp, nid); > goto out; > } > > @@ -2086,14 +2085,14 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > if (!nmask || node_isset(hpage_node, *nmask)) { > mpol_cond_put(pol); > page = __alloc_pages_node(hpage_node, > - gfp | __GFP_THISNODE, order); > + gfp | __GFP_THISNODE); > goto out; > } > } > > nmask = policy_nodemask(gfp, pol); > preferred_nid = policy_node(gfp, pol, node); > - page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); > + page = __alloc_pages_nodemask(gfp, preferred_nid, nmask); > mpol_cond_put(pol); > out: > return page; > @@ -2108,13 +2107,12 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > * %GFP_HIGHMEM highmem allocation, > * %GFP_FS don't call back into a file system. > * %GFP_ATOMIC don't sleep. > - * @order: Power of two of allocation size in pages. 0 is a single page. > * > * Allocate a page from the kernel page pool. When not in > - * interrupt context and apply the current process NUMA policy. > + * interrupt context apply the current process NUMA policy. > * Returns NULL when no page can be allocated. > */ > -struct page *alloc_pages_current(gfp_t gfp, unsigned order) > +struct page *alloc_pages_current(gfp_t gfp) > { > struct mempolicy *pol = &default_policy; > struct page *page; > @@ -2127,9 +2125,9 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) > * nor system default_policy > */ > if (pol->mode == MPOL_INTERLEAVE) > - page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); > + page = alloc_page_interleave(gfp, interleave_nodes(pol)); > else > - page = __alloc_pages_nodemask(gfp, order, > + page = __alloc_pages_nodemask(gfp, > policy_node(gfp, pol, numa_node_id()), > policy_nodemask(gfp, pol)); > > diff --git a/mm/migrate.c b/mm/migrate.c > index f2ecc2855a12..acb479132398 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -1884,7 +1884,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, > (GFP_HIGHUSER_MOVABLE | > __GFP_THISNODE | __GFP_NOMEMALLOC | > __GFP_NORETRY | __GFP_NOWARN) & > - ~__GFP_RECLAIM, 0); > + ~__GFP_RECLAIM); > > return newpage; > } > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index deea16489e2b..13191fe2f19d 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -4610,11 +4610,11 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) > * This is the 'heart' of the zoned buddy allocator. > */ > struct page * > -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, > - nodemask_t *nodemask) > +__alloc_pages_nodemask(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask) > { > struct page *page; > unsigned int alloc_flags = ALLOC_WMARK_LOW; > + int order = gfp_order(gfp_mask); > gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ > struct alloc_context ac = { }; > > diff --git a/mm/shmem.c b/mm/shmem.c > index a1e9f6194138..445e76e5c0c2 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1463,8 +1463,9 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, > return NULL; > > shmem_pseudo_vma_init(&pvma, info, hindex); > - page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, > - HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); > + page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | > + __GFP_NOWARN | __GFP_PMD_ORDER, > + &pvma, 0, numa_node_id(), true); > shmem_pseudo_vma_destroy(&pvma); > if (page) > prep_transhuge_page(page); > diff --git a/mm/slub.c b/mm/slub.c > index a34fbe1f6ede..7504fa3f844b 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1497,7 +1497,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, > if (node == NUMA_NO_NODE) > page = alloc_pages(flags, order); > else > - page = __alloc_pages_node(node, flags, order); > + page = __alloc_pages_node(node, flags | __GFP_ORDER(order)); > > if (page && memcg_charge_slab(page, flags, order, s)) { > __free_pages(page, order); > -- > 2.20.1 >