On Thu, 29 Sep 2011, Alex,Shi wrote:
> > Is it really worth it? The higher the value the higher the potential
> > memory that is stuck in the per cpu partial pages?
>
> It is hard to find best balance. :)
Well then lets err on the side of smaller memory use for now.
> I am tested aim9/netperf, both of them was said related to memory
> allocation, but didn't find performance change with/without PCP. Seems
> only hackbench sensitive on this. As to aim9, whichever with ourself
> configuration, or with Mel Gorman's aim9 configuration from his mmtest,
> both of them has no clear performance change for PCP slub.
AIM9 tests are usually single threaded so I would not expect any
differences. Try AIM7? And concurrent netperfs?
The PCP patch helps only if there is node lock contention. Meaning
simultaneous allocations/frees from multiple processor from the same
cache.
> Checking the kernel function call graphic via perf record/perf report,
> slab function only be used much in hackbench benchmark.
Then the question arises if its worthwhile merging if it only affects this
benchmark.
> Above is what I did this week for PCP.
>
> BTW, I will take my one week holiday from tomorrow. e-mail access will
> be slow.
Have a nice holiday.
diff --git a/mm/slub.c b/mm/slub.c
index 492beab..372f219 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1613,7 +1613,7 @@ static void *get_partial_node(struct kmem_cache *s,
spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t = acquire_slab(s, n, page, object == NULL);
- int available;
+ int available = 1;
if (!t)
continue;
@@ -1623,12 +1623,14 @@ static void *get_partial_node(struct kmem_cache *s,
c->node = page_to_nid(page);
stat(s, ALLOC_FROM_PARTIAL);
object = t;
- available = page->objects - page->inuse;
} else {
page->freelist = t;
available = put_cpu_partial(s, page, 0);
+ if(!available)
+ add_partial(n, page, 0);
+
}
- if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
+ if (kmem_cache_debug(s) || !available)
break;
}
@@ -2017,17 +2019,10 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
if (oldpage) {
pobjects = oldpage->pobjects;
pages = oldpage->pages;
- if (drain && pobjects > s->cpu_partial) {
- unsigned long flags;
- /*
- * partial array is full. Move the existing
- * set to the per node partial list.
- */
- local_irq_save(flags);
- unfreeze_partials(s);
- local_irq_restore(flags);
+ if (pobjects > s->cpu_partial) {
pobjects = 0;
- pages = 0;
+ page->frozen = 0;
+ break;
}
}
@@ -2039,7 +2034,10 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
page->next = oldpage;
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
- stat(s, CPU_PARTIAL_FREE);
+
+ if(pobjects)
+ stat(s, CPU_PARTIAL_FREE);
+
return pobjects;
}
@@ -2472,6 +2470,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
new.inuse--;
if ((!new.inuse || !prior) && !was_frozen && !n) {
+
if (!kmem_cache_debug(s) && !prior)
/*
@@ -2482,7 +2481,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
else { /* Needs to be taken off a list */
- n = get_node(s, page_to_nid(page));
+ n = get_node(s, page_to_nid(page));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
@@ -2492,8 +2491,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* other processors updating the list of slabs.
*/
spin_lock_irqsave(&n->list_lock, flags);
-
}
+
}
inuse = new.inuse;
@@ -2503,23 +2502,23 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
"__slab_free"));
if (likely(!n)) {
-
- /*
- * If we just froze the page then put it onto the
- * per cpu partial list.
- */
if (new.frozen && !was_frozen)
- put_cpu_partial(s, page, 1);
+ if (!put_cpu_partial(s, page, 1)){
+ n = get_node(s, page_to_nid(page));
+ spin_lock_irqsave(&n->list_lock, flags);
+ goto get_lock;
+ }
/*
* The list lock was not taken therefore no list
* activity can be necessary.
*/
- if (was_frozen)
- stat(s, FREE_FROZEN);
- return;
- }
+ if (was_frozen)
+ stat(s, FREE_FROZEN);
+ return;
+ }
+get_lock:
/*
* was_frozen may have been set after we acquired the list_lock in
* an earlier loop. So we need to check it here again.
@@ -2536,7 +2535,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
*/
if (unlikely(!prior)) {
remove_full(s, page);
- add_partial(n, page, 0);
+ add_partial(n, page, 1);
stat(s, FREE_ADD_PARTIAL);
}
}