On Tue 01-11-22 12:13:35, Zach O'Keefe wrote: [...] > This is slightly tangential - but I don't want to send a new mail > about it -- but I wonder if we should be doing __GFP_THISNODE + > explicit node vs having hpage_collapse_find_target_node() set a > nodemask. We could then provide fallback nodes for ties, or if some > node contained > some threshold number of pages. I would simply go with something like this (not even compile tested): diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4734315f7940..947a5158fe11 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -96,9 +96,6 @@ struct collapse_control { /* Num pages scanned per node */ u32 node_load[MAX_NUMNODES]; - - /* Last target selected in hpage_collapse_find_target_node() */ - int last_target_node; }; /** @@ -734,7 +731,6 @@ static void khugepaged_alloc_sleep(void) struct collapse_control khugepaged_collapse_control = { .is_khugepaged = true, - .last_target_node = NUMA_NO_NODE, }; static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc) @@ -772,7 +768,7 @@ static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void) } #ifdef CONFIG_NUMA -static int hpage_collapse_find_target_node(struct collapse_control *cc) +static int hpage_collapse_find_target_node(struct collapse_control *cc, nodemask_t *alloc_mask) { int nid, target_node = 0, max_value = 0; @@ -783,28 +779,25 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) target_node = nid; } + nodes_clear(&alloc_mask); /* do some balance if several nodes have the same hit record */ - if (target_node <= cc->last_target_node) - for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES; - nid++) - if (max_value == cc->node_load[nid]) { - target_node = nid; - break; - } + for_each_online_node(nid) {_ + if (max_value == cc->node_load[nid]) + node_set(nid, &alloc_mask) + } - cc->last_target_node = target_node; return target_node; } #else -static int hpage_collapse_find_target_node(struct collapse_control *cc) +static int hpage_collapse_find_target_node(struct collapse_control *cc, nodemask_t *alloc_mask) { return 0; } #endif -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node) +static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, nodemask_t *nmask) { - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); + *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); if (unlikely(!*hpage)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return false; @@ -958,9 +951,18 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, /* Only allocate from the target node */ gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : GFP_TRANSHUGE) | __GFP_THISNODE; - int node = hpage_collapse_find_target_node(cc); + NODEMASK_ALLOC(nodemask_t, nmask, GFP_KERNEL); + int node; + int ret; + + if (!nmaks) + return SCAN_ALLOC_HUGE_PAGE_FAIL; + + node = hpage_collapse_find_target_node(cc, nmask); + ret = hpage_collapse_alloc_page(hpage, gfp, node, nmask); + NODEMASK_FREE(nmask); - if (!hpage_collapse_alloc_page(hpage, gfp, node)) + if (!ret) return SCAN_ALLOC_HUGE_PAGE_FAIL; if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp))) return SCAN_CGROUP_CHARGE_FAIL; @@ -2576,7 +2578,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, if (!cc) return -ENOMEM; cc->is_khugepaged = false; - cc->last_target_node = NUMA_NO_NODE; mmgrab(mm); lru_add_drain_all(); -- Michal Hocko SUSE Labs