Re: [PATCH resend] mm: hugetlb_vmemmap: use bulk allocator in alloc_vmemmap_page_list()

Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> · Wed, 25 Oct 2023 10:32:54 +0100

On Wed, Sep 06, 2023 at 03:47:42AM +0100, Matthew Wilcox wrote:
> On Tue, Sep 05, 2023 at 06:35:08PM +0800, Kefeng Wang wrote:
> > It is needed 4095 pages(1G) or 7 pages(2M) to be allocated once in
> > alloc_vmemmap_page_list(), so let's add a bulk allocator varietas
> > alloc_pages_bulk_list_node() and switch alloc_vmemmap_page_list()
> > to use it to accelerate page allocation.
> 
> Argh, no, please don't do this.
> 
> Iterating a linked list is _expensive_.  It is about 10x quicker to
> iterate an array than a linked list.  Adding the list_head option
> to __alloc_pages_bulk() was a colossal mistake.  Don't perpetuate it.
> 
> These pages are going into an array anyway.  Don't put them on a list
> first.
> 

I don't have access to my test infrastructure at the moment so this isn't
even properly build tested but I think it's time for something like;

--8<--
mm: page_alloc: Remove list interface for bulk page allocation

Commit 387ba26fb1cb ("mm/page_alloc: add a bulk page allocator") introduced
an API for bulk allocating pages stored on a list. Later an array interface
was introduced as it was faster for the initial use cases. For two years,
the array option continues to be superior for each use case and any attempt
to us the list interface ultimately used arrays.  Remove the list-based
API for bulk page allocation and rename the API.

Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
---
 include/linux/gfp.h   | 17 +++++------------
 mm/mempolicy.c        | 21 ++++++++++-----------
 mm/page_alloc.c       | 24 ++++++------------------
 mm/vmalloc.c          |  4 ++--
 net/core/page_pool.c  |  4 ++--
 net/sunrpc/svc.c      |  2 +-
 net/sunrpc/svc_xprt.c |  2 +-
 7 files changed, 27 insertions(+), 47 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 665f06675c83..d81eb255718a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -181,33 +181,26 @@ struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
 
 unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 				nodemask_t *nodemask, int nr_pages,
-				struct list_head *page_list,
 				struct page **page_array);
 
-unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+unsigned long alloc_pages_bulk_mempolicy(gfp_t gfp,
 				unsigned long nr_pages,
 				struct page **page_array);
 
 /* Bulk allocate order-0 pages */
 static inline unsigned long
-alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
+alloc_pages_bulk(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
 {
-	return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
+	return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, page_array);
 }
 
 static inline unsigned long
-alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
-{
-	return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array);
-}
-
-static inline unsigned long
-alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
+alloc_pages_bulk_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
 {
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
 
-	return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
+	return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, page_array);
 }
 
 static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 29ebf1e7898c..2a763df3e1ee 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2315,7 +2315,7 @@ struct folio *folio_alloc(gfp_t gfp, unsigned order)
 }
 EXPORT_SYMBOL(folio_alloc);
 
-static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
+static unsigned long alloc_pages_bulk_interleave(gfp_t gfp,
 		struct mempolicy *pol, unsigned long nr_pages,
 		struct page **page_array)
 {
@@ -2334,13 +2334,12 @@ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
 		if (delta) {
 			nr_allocated = __alloc_pages_bulk(gfp,
 					interleave_nodes(pol), NULL,
-					nr_pages_per_node + 1, NULL,
-					page_array);
+					nr_pages_per_node + 1, page_array);
 			delta--;
 		} else {
 			nr_allocated = __alloc_pages_bulk(gfp,
 					interleave_nodes(pol), NULL,
-					nr_pages_per_node, NULL, page_array);
+					nr_pages_per_node, page_array);
 		}
 
 		page_array += nr_allocated;
@@ -2350,7 +2349,7 @@ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
 	return total_allocated;
 }
 
-static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
+static unsigned long alloc_pages_bulk_preferred_many(gfp_t gfp, int nid,
 		struct mempolicy *pol, unsigned long nr_pages,
 		struct page **page_array)
 {
@@ -2361,11 +2360,11 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
 	preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
 
 	nr_allocated  = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes,
-					   nr_pages, NULL, page_array);
+					   nr_pages, page_array);
 
 	if (nr_allocated < nr_pages)
 		nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL,
-				nr_pages - nr_allocated, NULL,
+				nr_pages - nr_allocated,
 				page_array + nr_allocated);
 	return nr_allocated;
 }
@@ -2376,7 +2375,7 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
  * It can accelerate memory allocation especially interleaving
  * allocate memory.
  */
-unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+unsigned long alloc_pages_bulk_mempolicy(gfp_t gfp,
 		unsigned long nr_pages, struct page **page_array)
 {
 	struct mempolicy *pol = &default_policy;
@@ -2385,15 +2384,15 @@ unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
 		pol = get_task_policy(current);
 
 	if (pol->mode == MPOL_INTERLEAVE)
-		return alloc_pages_bulk_array_interleave(gfp, pol,
+		return alloc_pages_bulk_interleave(gfp, pol,
 							 nr_pages, page_array);
 
 	if (pol->mode == MPOL_PREFERRED_MANY)
-		return alloc_pages_bulk_array_preferred_many(gfp,
+		return alloc_pages_bulk_preferred_many(gfp,
 				numa_node_id(), pol, nr_pages, page_array);
 
 	return __alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()),
-				  policy_nodemask(gfp, pol), nr_pages, NULL,
+				  policy_nodemask(gfp, pol), nr_pages,
 				  page_array);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 85741403948f..8f035304fbf2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4221,23 +4221,20 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
  * @preferred_nid: The preferred NUMA node ID to allocate from
  * @nodemask: Set of nodes to allocate from, may be NULL
  * @nr_pages: The number of pages desired on the list or array
- * @page_list: Optional list to store the allocated pages
- * @page_array: Optional array to store the pages
+ * @page_array: Array array to store the pages
  *
  * This is a batched version of the page allocator that attempts to
- * allocate nr_pages quickly. Pages are added to page_list if page_list
- * is not NULL, otherwise it is assumed that the page_array is valid.
+ * allocate nr_pages quickly with pointers stored in page_array.
  *
  * For lists, nr_pages is the number of pages that should be allocated.
  *
  * For arrays, only NULL elements are populated with pages and nr_pages
  * is the maximum number of pages that will be stored in the array.
  *
- * Returns the number of pages on the list or array.
+ * Returns the number of pages in the array.
  */
 unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 			nodemask_t *nodemask, int nr_pages,
-			struct list_head *page_list,
 			struct page **page_array)
 {
 	struct page *page;
@@ -4351,11 +4348,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_account++;
 
 		prep_new_page(page, 0, gfp, 0);
-		if (page_list)
-			list_add(&page->lru, page_list);
-		else
-			page_array[nr_populated] = page;
-		nr_populated++;
+		page_array[nr_populated++] = page;
 	}
 
 	pcp_spin_unlock(pcp);
@@ -4372,13 +4365,8 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 
 failed:
 	page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
-	if (page) {
-		if (page_list)
-			list_add(&page->lru, page_list);
-		else
-			page_array[nr_populated] = page;
-		nr_populated++;
-	}
+	if (page)
+		page_array[nr_populated++] = page;
 
 	goto out;
 }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a3fedb3ee0db..46d49e04039e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3025,12 +3025,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			 * but mempolicy wants to alloc memory by interleaving.
 			 */
 			if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
-				nr = alloc_pages_bulk_array_mempolicy(bulk_gfp,
+				nr = alloc_pages_bulk_mempolicy(bulk_gfp,
 							nr_pages_request,
 							pages + nr_allocated);
 
 			else
-				nr = alloc_pages_bulk_array_node(bulk_gfp, nid,
+				nr = alloc_pages_bulk_node(bulk_gfp, nid,
 							nr_pages_request,
 							pages + nr_allocated);
 
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 77cb75e63aca..9cff3e4350ee 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -426,10 +426,10 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 	if (unlikely(pool->alloc.count > 0))
 		return pool->alloc.cache[--pool->alloc.count];
 
-	/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
+	/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */
 	memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
 
-	nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
+	nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk,
 					       pool->alloc.cache);
 	if (unlikely(!nr_pages))
 		return NULL;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 812fda9d45dd..afce749a0871 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -613,7 +613,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
 	if (pages > RPCSVC_MAXPAGES)
 		pages = RPCSVC_MAXPAGES;
 
-	ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages,
+	ret = alloc_pages_bulk_node(GFP_KERNEL, node, pages,
 					  rqstp->rq_pages);
 	return ret == pages;
 }
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 4cfe9640df48..92c6eae6610c 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -666,7 +666,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
 	}
 
 	for (filled = 0; filled < pages; filled = ret) {
-		ret = alloc_pages_bulk_array_node(GFP_KERNEL,
+		ret = alloc_pages_bulk_node(GFP_KERNEL,
 						  rqstp->rq_pool->sp_id,
 						  pages, rqstp->rq_pages);
 		if (ret > filled)