Re: [PATCH] slub Discard slab page only when node partials > minimum setting

Christoph Lameter <cl@xxxxxxxxxx> · Thu, 29 Sep 2011 09:32:29 -0500 (CDT)

On Thu, 29 Sep 2011, Alex,Shi wrote:

> > Is it really worth it? The higher the value the higher the potential
> > memory that is stuck in the per cpu partial pages?
>
> It is hard to find best balance. :)

Well then lets err on the side of smaller memory use for now.

> I am tested aim9/netperf, both of them was said related to memory
> allocation, but didn't find performance change with/without PCP. Seems
> only hackbench sensitive on this. As to aim9, whichever with ourself
> configuration, or with Mel Gorman's aim9 configuration from his mmtest,
> both of them has no clear performance change for PCP slub.

AIM9 tests are usually single threaded so I would not expect any
differences. Try AIM7? And concurrent netperfs?

The PCP patch helps only if there is node lock contention. Meaning
simultaneous allocations/frees from multiple processor from the same
cache.

> Checking the kernel function call graphic via perf record/perf report,
> slab function only be used much in hackbench benchmark.

Then the question arises if its worthwhile merging if it only affects this
benchmark.

> Above is what I did this week for PCP.
>
> BTW, I will take my one week holiday from tomorrow. e-mail access will
> be slow.

Have a nice holiday.

diff --git a/mm/slub.c b/mm/slub.c

index 492beab..372f219 100644

--- a/mm/slub.c

+++ b/mm/slub.c

@@ -1613,7 +1613,7 @@ static void *get_partial_node(struct kmem_cache *s,

 	spin_lock(&n->list_lock);

 	list_for_each_entry_safe(page, page2, &n->partial, lru) {

 		void *t = acquire_slab(s, n, page, object == NULL);

-		int available;

+		int available = 1;

 

 		if (!t)

 			continue;

@@ -1623,12 +1623,14 @@ static void *get_partial_node(struct kmem_cache *s,

 			c->node = page_to_nid(page);

 			stat(s, ALLOC_FROM_PARTIAL);

 			object = t;

-			available =  page->objects - page->inuse;

 		} else {

 			page->freelist = t;

 			available = put_cpu_partial(s, page, 0);

+			if(!available)

+				add_partial(n, page, 0);

+				

 		}

-		if (kmem_cache_debug(s) || available > s->cpu_partial / 2)

+		if (kmem_cache_debug(s) || !available)

 			break;

 

 	}

@@ -2017,17 +2019,10 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)

 		if (oldpage) {

 			pobjects = oldpage->pobjects;

 			pages = oldpage->pages;

-			if (drain && pobjects > s->cpu_partial) {

-				unsigned long flags;

-				/*

-				 * partial array is full. Move the existing

-				 * set to the per node partial list.

-				 */

-				local_irq_save(flags);

-				unfreeze_partials(s);

-				local_irq_restore(flags);

+			if (pobjects > s->cpu_partial) {

 				pobjects = 0;

-				pages = 0;

+				page->frozen = 0;

+				break;

 			}

 		}

 

@@ -2039,7 +2034,10 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)

 		page->next = oldpage;

 

 	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);

-	stat(s, CPU_PARTIAL_FREE);

+

+	if(pobjects)

+		stat(s, CPU_PARTIAL_FREE);

+

 	return pobjects;

 }

 

@@ -2472,6 +2470,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

 		new.inuse--;

 		if ((!new.inuse || !prior) && !was_frozen && !n) {

 

+

 			if (!kmem_cache_debug(s) && !prior)

 

 				/*

@@ -2482,7 +2481,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

 

 			else { /* Needs to be taken off a list */

 

-	                        n = get_node(s, page_to_nid(page));

+				n = get_node(s, page_to_nid(page));

 				/*

 				 * Speculatively acquire the list_lock.

 				 * If the cmpxchg does not succeed then we may

@@ -2492,8 +2491,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

 				 * other processors updating the list of slabs.

 				 */

 				spin_lock_irqsave(&n->list_lock, flags);

-

 			}

+

 		}

 		inuse = new.inuse;

 

@@ -2503,23 +2502,23 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

 		"__slab_free"));

 

 	if (likely(!n)) {

-

-		/*

-		 * If we just froze the page then put it onto the

-		 * per cpu partial list.

-		 */

 		if (new.frozen && !was_frozen)

-			put_cpu_partial(s, page, 1);

+			if (!put_cpu_partial(s, page, 1)){

+				n = get_node(s, page_to_nid(page));

+				spin_lock_irqsave(&n->list_lock, flags);

+				goto get_lock;

+			}

 

 		/*

 		 * The list lock was not taken therefore no list

 		 * activity can be necessary.

 		 */

-                if (was_frozen)

-                        stat(s, FREE_FROZEN);

-                return;

-        }

+		if (was_frozen)

+			stat(s, FREE_FROZEN);

+		return;

+	}

 

+get_lock:

 	/*

 	 * was_frozen may have been set after we acquired the list_lock in

 	 * an earlier loop. So we need to check it here again.

@@ -2536,7 +2535,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

 		 */

 		if (unlikely(!prior)) {

 			remove_full(s, page);

-			add_partial(n, page, 0);

+			add_partial(n, page, 1);

 			stat(s, FREE_ADD_PARTIAL);

 		}

 	}