On Wed 27-05-20 15:44:54, Joonsoo Kim wrote: > From: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> > > Currently, page allocation functions for migration requires some arguments. > More worse, in the following patch, more argument will be needed to unify > the similar functions. To simplify them, in this patch, unified data > structure that controls allocation behaviour is introduced. > > For clean-up, function declarations are re-ordered. This is really hard to review without having a clear picture of the resulting code so bear with me. I can see some reasons why allocation callbacks might benefit from a agragated argument but you seem to touch the internal hugetlb dequeue_huge_page_vma which shouldn't really need that. I wouldn't mind much but I remember the hugetlb allocation functions layering is quite complex for hugetlb specific reasons (see 0c397daea1d4 ("mm, hugetlb: further simplify hugetlb allocation API") for more background). Is there any reason why the agregated argument cannot be limited only to migration callbacks. That would be alloc_huge_page_node, alloc_huge_page_nodemask and alloc_huge_page_vma. > Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> > --- > include/linux/hugetlb.h | 35 +++++++++++++++------------- > mm/gup.c | 11 ++++++--- > mm/hugetlb.c | 62 ++++++++++++++++++++++++------------------------- > mm/internal.h | 7 ++++++ > mm/mempolicy.c | 13 +++++++---- > mm/migrate.c | 13 +++++++---- > 6 files changed, 83 insertions(+), 58 deletions(-) > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index 50650d0..15c8fb8 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -14,6 +14,7 @@ > struct ctl_table; > struct user_struct; > struct mmu_gather; > +struct alloc_control; > > #ifndef is_hugepd > typedef struct { unsigned long pd; } hugepd_t; > @@ -502,15 +503,16 @@ struct huge_bootmem_page { > struct hstate *hstate; > }; > > -struct page *alloc_huge_page(struct vm_area_struct *vma, > - unsigned long addr, int avoid_reserve); > -struct page *alloc_huge_page_node(struct hstate *h, int nid); > -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, > - nodemask_t *nmask); > +struct page *alloc_migrate_huge_page(struct hstate *h, > + struct alloc_control *ac); > +struct page *alloc_huge_page_node(struct hstate *h, > + struct alloc_control *ac); > +struct page *alloc_huge_page_nodemask(struct hstate *h, > + struct alloc_control *ac); > struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, > unsigned long address); > -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, > - int nid, nodemask_t *nmask); > +struct page *alloc_huge_page(struct vm_area_struct *vma, > + unsigned long addr, int avoid_reserve); > int huge_add_to_page_cache(struct page *page, struct address_space *mapping, > pgoff_t idx); > > @@ -752,20 +754,14 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, > #else /* CONFIG_HUGETLB_PAGE */ > struct hstate {}; > > -static inline struct page *alloc_huge_page(struct vm_area_struct *vma, > - unsigned long addr, > - int avoid_reserve) > -{ > - return NULL; > -} > - > -static inline struct page *alloc_huge_page_node(struct hstate *h, int nid) > +static inline struct page * > +alloc_huge_page_node(struct hstate *h, struct alloc_control *ac) > { > return NULL; > } > > static inline struct page * > -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask) > +alloc_huge_page_nodemask(struct hstate *h, struct alloc_control *ac) > { > return NULL; > } > @@ -777,6 +773,13 @@ static inline struct page *alloc_huge_page_vma(struct hstate *h, > return NULL; > } > > +static inline struct page *alloc_huge_page(struct vm_area_struct *vma, > + unsigned long addr, > + int avoid_reserve) > +{ > + return NULL; > +} > + > static inline int __alloc_bootmem_huge_page(struct hstate *h) > { > return 0; > diff --git a/mm/gup.c b/mm/gup.c > index ee039d4..6b78f11 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -1612,16 +1612,21 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private) > if (PageHighMem(page)) > gfp_mask |= __GFP_HIGHMEM; > > -#ifdef CONFIG_HUGETLB_PAGE > if (PageHuge(page)) { > struct hstate *h = page_hstate(page); > + struct alloc_control ac = { > + .nid = nid, > + .nmask = NULL, > + .gfp_mask = gfp_mask, > + }; > + > /* > * We don't want to dequeue from the pool because pool pages will > * mostly be from the CMA region. > */ > - return alloc_migrate_huge_page(h, gfp_mask, nid, NULL); > + return alloc_migrate_huge_page(h, &ac); > } > -#endif > + > if (PageTransHuge(page)) { > struct page *thp; > /* > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 57ece74..453ba94 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1053,8 +1053,8 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) > return page; > } > > -static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid, > - nodemask_t *nmask) > +static struct page *dequeue_huge_page_nodemask(struct hstate *h, > + struct alloc_control *ac) > { > unsigned int cpuset_mems_cookie; > struct zonelist *zonelist; > @@ -1062,14 +1062,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, > struct zoneref *z; > int node = NUMA_NO_NODE; > > - zonelist = node_zonelist(nid, gfp_mask); > + zonelist = node_zonelist(ac->nid, ac->gfp_mask); > > retry_cpuset: > cpuset_mems_cookie = read_mems_allowed_begin(); > - for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) { > + for_each_zone_zonelist_nodemask(zone, z, zonelist, > + gfp_zone(ac->gfp_mask), ac->nmask) { > struct page *page; > > - if (!cpuset_zone_allowed(zone, gfp_mask)) > + if (!cpuset_zone_allowed(zone, ac->gfp_mask)) > continue; > /* > * no need to ask again on the same node. Pool is node rather than > @@ -1105,9 +1106,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, > { > struct page *page; > struct mempolicy *mpol; > - gfp_t gfp_mask; > - nodemask_t *nodemask; > - int nid; > + struct alloc_control ac = {0}; > > /* > * A child process with MAP_PRIVATE mappings created by their parent > @@ -1122,9 +1121,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, > if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0) > goto err; > > - gfp_mask = htlb_alloc_mask(h); > - nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); > - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); > + ac.gfp_mask = htlb_alloc_mask(h); > + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask); > + > + page = dequeue_huge_page_nodemask(h, &ac); > if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { > SetPagePrivate(page); > h->resv_huge_pages--; > @@ -1937,15 +1937,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, > return page; > } > > -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, > - int nid, nodemask_t *nmask) > +struct page *alloc_migrate_huge_page(struct hstate *h, > + struct alloc_control *ac) > { > struct page *page; > > if (hstate_is_gigantic(h)) > return NULL; > > - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL); > + page = alloc_fresh_huge_page(h, ac->gfp_mask, > + ac->nid, ac->nmask, NULL); > if (!page) > return NULL; > > @@ -1979,36 +1980,37 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, > } > > /* page migration callback function */ > -struct page *alloc_huge_page_node(struct hstate *h, int nid) > +struct page *alloc_huge_page_node(struct hstate *h, > + struct alloc_control *ac) > { > - gfp_t gfp_mask = htlb_alloc_mask(h); > struct page *page = NULL; > > - if (nid != NUMA_NO_NODE) > - gfp_mask |= __GFP_THISNODE; > + ac->gfp_mask = htlb_alloc_mask(h); > + if (ac->nid != NUMA_NO_NODE) > + ac->gfp_mask |= __GFP_THISNODE; > > spin_lock(&hugetlb_lock); > if (h->free_huge_pages - h->resv_huge_pages > 0) > - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL); > + page = dequeue_huge_page_nodemask(h, ac); > spin_unlock(&hugetlb_lock); > > if (!page) > - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL); > + page = alloc_migrate_huge_page(h, ac); > > return page; > } > > /* page migration callback function */ > -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, > - nodemask_t *nmask) > +struct page *alloc_huge_page_nodemask(struct hstate *h, > + struct alloc_control *ac) > { > - gfp_t gfp_mask = htlb_alloc_mask(h); > + ac->gfp_mask = htlb_alloc_mask(h); > > spin_lock(&hugetlb_lock); > if (h->free_huge_pages - h->resv_huge_pages > 0) { > struct page *page; > > - page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask); > + page = dequeue_huge_page_nodemask(h, ac); > if (page) { > spin_unlock(&hugetlb_lock); > return page; > @@ -2016,22 +2018,20 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, > } > spin_unlock(&hugetlb_lock); > > - return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask); > + return alloc_migrate_huge_page(h, ac); > } > > /* mempolicy aware migration callback */ > struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, > unsigned long address) > { > + struct alloc_control ac = {0}; > struct mempolicy *mpol; > - nodemask_t *nodemask; > struct page *page; > - gfp_t gfp_mask; > - int node; > > - gfp_mask = htlb_alloc_mask(h); > - node = huge_node(vma, address, gfp_mask, &mpol, &nodemask); > - page = alloc_huge_page_nodemask(h, node, nodemask); > + ac.gfp_mask = htlb_alloc_mask(h); > + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask); > + page = alloc_huge_page_nodemask(h, &ac); > mpol_cond_put(mpol); > > return page; > diff --git a/mm/internal.h b/mm/internal.h > index 9886db2..6e613ce 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -613,4 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page) > > void setup_zone_pageset(struct zone *zone); > extern struct page *alloc_new_node_page(struct page *page, unsigned long node); > + > +struct alloc_control { > + int nid; /* preferred node id */ > + nodemask_t *nmask; > + gfp_t gfp_mask; > +}; > + > #endif /* __MM_INTERNAL_H */ > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 3813206..3b6b551 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -1068,10 +1068,15 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, > /* page allocation callback for NUMA node migration */ > struct page *alloc_new_node_page(struct page *page, unsigned long node) > { > - if (PageHuge(page)) > - return alloc_huge_page_node(page_hstate(compound_head(page)), > - node); > - else if (PageTransHuge(page)) { > + if (PageHuge(page)) { > + struct hstate *h = page_hstate(compound_head(page)); > + struct alloc_control ac = { > + .nid = node, > + .nmask = NULL, > + }; > + > + return alloc_huge_page_node(h, &ac); > + } else if (PageTransHuge(page)) { > struct page *thp; > > thp = alloc_pages_node(node, > diff --git a/mm/migrate.c b/mm/migrate.c > index 824c22e..30217537 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -1544,10 +1544,15 @@ struct page *new_page_nodemask(struct page *page, > unsigned int order = 0; > struct page *new_page = NULL; > > - if (PageHuge(page)) > - return alloc_huge_page_nodemask( > - page_hstate(compound_head(page)), > - preferred_nid, nodemask); > + if (PageHuge(page)) { > + struct hstate *h = page_hstate(compound_head(page)); > + struct alloc_control ac = { > + .nid = preferred_nid, > + .nmask = nodemask, > + }; > + > + return alloc_huge_page_nodemask(h, &ac); > + } > > if (PageTransHuge(page)) { > gfp_mask |= GFP_TRANSHUGE; > -- > 2.7.4 > -- Michal Hocko SUSE Labs