On Wed, Nov 2, 2022 at 12:39 AM Michal Hocko <mhocko@xxxxxxxx> wrote: > > On Tue 01-11-22 12:13:35, Zach O'Keefe wrote: > [...] > > This is slightly tangential - but I don't want to send a new mail > > about it -- but I wonder if we should be doing __GFP_THISNODE + > > explicit node vs having hpage_collapse_find_target_node() set a > > nodemask. We could then provide fallback nodes for ties, or if some > > node contained > some threshold number of pages. > > I would simply go with something like this (not even compile tested): Thanks, Michal. It is definitely an option. As I talked with Zach, I'm not sure whether it is worth making the code more complicated for such micro optimization or not. Removing __GFP_THISNODE or even removing the node balance code should be fine too IMHO. TBH I doubt there would be any noticeable difference. > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 4734315f7940..947a5158fe11 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -96,9 +96,6 @@ struct collapse_control { > > /* Num pages scanned per node */ > u32 node_load[MAX_NUMNODES]; > - > - /* Last target selected in hpage_collapse_find_target_node() */ > - int last_target_node; > }; > > /** > @@ -734,7 +731,6 @@ static void khugepaged_alloc_sleep(void) > > struct collapse_control khugepaged_collapse_control = { > .is_khugepaged = true, > - .last_target_node = NUMA_NO_NODE, > }; > > static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc) > @@ -772,7 +768,7 @@ static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void) > } > > #ifdef CONFIG_NUMA > -static int hpage_collapse_find_target_node(struct collapse_control *cc) > +static int hpage_collapse_find_target_node(struct collapse_control *cc, nodemask_t *alloc_mask) > { > int nid, target_node = 0, max_value = 0; > > @@ -783,28 +779,25 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) > target_node = nid; > } > > + nodes_clear(&alloc_mask); > /* do some balance if several nodes have the same hit record */ > - if (target_node <= cc->last_target_node) > - for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES; > - nid++) > - if (max_value == cc->node_load[nid]) { > - target_node = nid; > - break; > - } > + for_each_online_node(nid) {_ > + if (max_value == cc->node_load[nid]) > + node_set(nid, &alloc_mask) > + } > > - cc->last_target_node = target_node; > return target_node; > } > #else > -static int hpage_collapse_find_target_node(struct collapse_control *cc) > +static int hpage_collapse_find_target_node(struct collapse_control *cc, nodemask_t *alloc_mask) > { > return 0; > } > #endif > > -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node) > +static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, nodemask_t *nmask) > { > - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); > + *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); > if (unlikely(!*hpage)) { > count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > return false; > @@ -958,9 +951,18 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, > /* Only allocate from the target node */ > gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : > GFP_TRANSHUGE) | __GFP_THISNODE; > - int node = hpage_collapse_find_target_node(cc); > + NODEMASK_ALLOC(nodemask_t, nmask, GFP_KERNEL); > + int node; > + int ret; > + > + if (!nmaks) > + return SCAN_ALLOC_HUGE_PAGE_FAIL; > + > + node = hpage_collapse_find_target_node(cc, nmask); > + ret = hpage_collapse_alloc_page(hpage, gfp, node, nmask); > + NODEMASK_FREE(nmask); > > - if (!hpage_collapse_alloc_page(hpage, gfp, node)) > + if (!ret) > return SCAN_ALLOC_HUGE_PAGE_FAIL; > if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp))) > return SCAN_CGROUP_CHARGE_FAIL; > @@ -2576,7 +2578,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, > if (!cc) > return -ENOMEM; > cc->is_khugepaged = false; > - cc->last_target_node = NUMA_NO_NODE; > > mmgrab(mm); > lru_add_drain_all(); > -- > Michal Hocko > SUSE Labs