On Tue, 2024-01-02 at 21:12 +0800, Gang Li wrote: > The parallelization of hugetlb allocation leads to errors when sharing > h->next_nid_to_alloc across different threads. To address this, it's Suggest you say With parallelization of hugetlb allocation across different threads, each thread works on a differnet node to allocate pages from, instead of all allocating from a common node h->next_nid_to_alloc. To address this, it's > necessary to assign a separate next_nid_to_alloc for each thread. > > Consequently, the hstate_next_node_to_alloc and for_each_node_mask_to_alloc > have been modified to directly accept a *next_nid_to_alloc parameter, > ensuring thread-specific allocation and avoiding concurrent access issues. > > Signed-off-by: Gang Li <gang.li@xxxxxxxxx> > --- > This patch seems not elegant, but I can't come up with anything better. > Any suggestions will be highly appreciated! > --- > mm/hugetlb.c | 22 ++++++++++++---------- > 1 file changed, 12 insertions(+), 10 deletions(-) > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 92448e747991d..a71bc1622b53b 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1464,15 +1464,15 @@ static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) > * next node from which to allocate, handling wrap at end of node > * mask. > */ > -static int hstate_next_node_to_alloc(struct hstate *h, > +static int hstate_next_node_to_alloc(int *next_nid_to_alloc, > nodemask_t *nodes_allowed) > { > int nid; > > VM_BUG_ON(!nodes_allowed); > > - nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed); > - h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed); > + nid = get_valid_node_allowed(*next_nid_to_alloc, nodes_allowed); > + *next_nid_to_alloc = next_node_allowed(nid, nodes_allowed); > > return nid; > } > @@ -1495,10 +1495,10 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) > return nid; > } > > -#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \ > +#define for_each_node_mask_to_alloc(next_nid_to_alloc, nr_nodes, node, mask) \ > for (nr_nodes = nodes_weight(*mask); \ > nr_nodes > 0 && \ > - ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \ > + ((node = hstate_next_node_to_alloc(next_nid_to_alloc, mask)) || 1); \ > nr_nodes--) > > #define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ > @@ -2350,12 +2350,13 @@ static void prep_and_add_allocated_folios(struct hstate *h, > */ > static struct folio *alloc_pool_huge_folio(struct hstate *h, > nodemask_t *nodes_allowed, > - nodemask_t *node_alloc_noretry) > + nodemask_t *node_alloc_noretry, > + int *next_nid_to_alloc) > { > gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; > int nr_nodes, node; > > - for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { > + for_each_node_mask_to_alloc(next_nid_to_alloc, nr_nodes, node, nodes_allowed) { > struct folio *folio; > > folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, node, > @@ -3310,7 +3311,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) > goto found; > } > /* allocate from next node when distributing huge pages */ > - for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) { > + for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, &node_states[N_MEMORY]) { > m = memblock_alloc_try_nid_raw( > huge_page_size(h), huge_page_size(h), > 0, MEMBLOCK_ALLOC_ACCESSIBLE, node); > @@ -3684,7 +3685,7 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed, > VM_BUG_ON(delta != -1 && delta != 1); > > if (delta < 0) { > - for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { > + for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, nodes_allowed) { > if (h->surplus_huge_pages_node[node]) > goto found; > } > @@ -3799,7 +3800,8 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, > cond_resched(); > > folio = alloc_pool_huge_folio(h, nodes_allowed, > - node_alloc_noretry); > + node_alloc_noretry, > + &h->next_nid_to_alloc); > if (!folio) { > prep_and_add_allocated_folios(h, &page_list); > spin_lock_irq(&hugetlb_lock);