[RFC] put page to pcp->lists[] tail if it is not on the same node

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



node
Reply-To: Wei Yang <richard.weiyang@xxxxxxxxx>

Masters,

During the code reading, I pop up this idea.

    In case we put some intelegence of NUMA node to pcp->lists[], we may
    get a better performance.

The idea is simple:

    Put page on other nodes to the tail of pcp->lists[], because we
    allocate from head and free from tail.

Since my desktop just has one numa node, I couldn't test the effect. I
just run a kernel build test to see if it would degrade current kernel.
The result looks not bad.

                    make -j4 bzImage
           base-line:
           
           real    6m15.947s        
           user    21m14.481s       
           sys     2m34.407s        
           
           real    6m16.089s        
           user    21m18.295s       
           sys     2m35.551s        
           
           real    6m16.239s        
           user    21m17.590s       
           sys     2m35.252s        
           
           patched:
           
           real    6m14.558s
           user    21m18.374s
           sys     2m33.143s
           
           real    6m14.606s
           user    21m14.969s
           sys     2m32.039s
           
           real    6m15.264s
           user    21m16.698s
           sys     2m33.024s

Sorry for sending this without a real justification. Hope this will not
make you uncomfortable. I would be very glad if you suggest some
verifications that I could do.

Below is my testing patch, look forward your comments.

>From 2f9a99521068dfe7ec98ea39f73649226d9a837b Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@xxxxxxxxx>
Date: Fri, 19 Oct 2018 11:37:09 +0800
Subject: [PATCH] mm: put page to pcp->lists[] tail if it is not on the same
 node

pcp->lists[] is used to allocate/free page for order 0 page.  While a
list of CPU on Node A could contain page of Node B.

If we put page on the same node to list head and put other pages on list
tail, this would increase the chance to allocate a page on the same node
and free a page on other nodes.

On a 64bit machine, size of per_cpu_pages will not increase because of
the alignment. The new added field *node* will fit in the same cache
line with count,  which minimize the performance impact.

Signed-off-by: Wei Yang <richard.weiyang@xxxxxxxxx>
---
 include/linux/mmzone.h |  1 +
 mm/page_alloc.c        | 30 +++++++++++++++++++++---------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5138efde11ae..27ce071bc99c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -272,6 +272,7 @@ enum zone_watermarks {
 #define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
 
 struct per_cpu_pages {
+	int node;               /* node id of this cpu */
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a398eafbae46..c7a27e461602 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2741,6 +2741,7 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
 static void free_unref_page_commit(struct page *page, unsigned long pfn)
 {
 	struct zone *zone = page_zone(page);
+	int page_node = page_to_nid(page);
 	struct per_cpu_pages *pcp;
 	int migratetype;
 
@@ -2763,7 +2764,14 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
 	}
 
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
-	list_add(&page->lru, &pcp->lists[migratetype]);
+	/*
+	 * If the page has the same node_id as this cpu, put the page at head.
+	 * Otherwise, put at the end.
+	 */
+	if (page_node == pcp->node)
+		list_add(&page->lru, &pcp->lists[migratetype]);
+	else
+		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
 		unsigned long batch = READ_ONCE(pcp->batch);
@@ -5615,7 +5623,7 @@ static int zone_batchsize(struct zone *zone)
  * exist).
  */
 static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
-		unsigned long batch)
+			   unsigned long batch, int node_id)
 {
        /* start with a fail safe value for batch */
 	pcp->batch = 1;
@@ -5626,12 +5634,14 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
 	smp_wmb();
 
 	pcp->batch = batch;
+	pcp->node = node_id;
 }
 
 /* a companion to pageset_set_high() */
-static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch)
+static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch,
+			      int node_id)
 {
-	pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch));
+	pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch), node_id);
 }
 
 static void pageset_init(struct per_cpu_pageset *p)
@@ -5650,7 +5660,7 @@ static void pageset_init(struct per_cpu_pageset *p)
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	pageset_init(p);
-	pageset_set_batch(p, batch);
+	pageset_set_batch(p, batch, 0);
 }
 
 /*
@@ -5658,13 +5668,13 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
  * to the value high for the pageset p.
  */
 static void pageset_set_high(struct per_cpu_pageset *p,
-				unsigned long high)
+				unsigned long high, int node_id)
 {
 	unsigned long batch = max(1UL, high / 4);
 	if ((high / 4) > (PAGE_SHIFT * 8))
 		batch = PAGE_SHIFT * 8;
 
-	pageset_update(&p->pcp, high, batch);
+	pageset_update(&p->pcp, high, batch, node_id);
 }
 
 static void pageset_set_high_and_batch(struct zone *zone,
@@ -5673,9 +5683,11 @@ static void pageset_set_high_and_batch(struct zone *zone,
 	if (percpu_pagelist_fraction)
 		pageset_set_high(pcp,
 			(zone->managed_pages /
-				percpu_pagelist_fraction));
+				percpu_pagelist_fraction),
+			zone->zone_pgdat->node_id);
 	else
-		pageset_set_batch(pcp, zone_batchsize(zone));
+		pageset_set_batch(pcp, zone_batchsize(zone),
+				  zone->zone_pgdat->node_id);
 }
 
 static void __meminit zone_pageset_init(struct zone *zone, int cpu)
-- 
2.15.1

-- 
Wei Yang
Help you, Help me




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux