This is in a draft state. Leave the cpu queue alone for off node accesses and go directly to the remote shared cache for alloations. Signed-off-by: Christoph Lameter <cl@xxxxxxxxxxxxxxxxxxxx> --- include/linux/slub_def.h | 1 mm/slub.c | 184 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 142 insertions(+), 43 deletions(-) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2010-05-21 15:30:47.000000000 -0500 +++ linux-2.6/include/linux/slub_def.h 2010-05-21 15:34:45.000000000 -0500 @@ -42,7 +42,6 @@ struct kmem_cache_cpu { unsigned stat[NR_SLUB_STAT_ITEMS]; #endif int objects; /* Number of objects available */ - int node; /* The node of the page (or -1 for debug) */ void *object[BOOT_QUEUE_SIZE]; /* List of objects */ }; Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2010-05-21 15:30:47.000000000 -0500 +++ linux-2.6/mm/slub.c 2010-05-21 15:37:04.000000000 -0500 @@ -1616,19 +1616,6 @@ static void resize_cpu_queue(struct kmem } } -/* - * Check if the objects in a per cpu structure fit numa - * locality expectations. - */ -static inline int node_match(struct kmem_cache_cpu *c, int node) -{ -#ifdef CONFIG_NUMA - if (node != -1 && c->node != node) - return 0; -#endif - return 1; -} - static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct page *)) { @@ -1718,9 +1705,9 @@ void retrieve_objects(struct kmem_cache } } +#ifdef CONFIG_NUMA static inline int find_numa_node(struct kmem_cache *s, int selected_node) { -#ifdef CONFIG_NUMA if (s->flags & SLAB_MEM_SPREAD && !in_interrupt() && selected_node == SLAB_NODE_UNSPECIFIED) { @@ -1731,10 +1718,113 @@ static inline int find_numa_node(struct if (current->mempolicy) return slab_node(current->mempolicy); } -#endif return selected_node; } +/* + * Try to allocate a partial slab from a specific node. + */ +static struct page *__get_partial_node(struct kmem_cache_node *n) +{ + struct page *page; + + if (!n->nr_partial) + return NULL; + + list_for_each_entry(page, &n->partial, lru) + if (lock_and_freeze_slab(n, page)) + goto out; + page = NULL; +out: + return page; +} + + +void *off_node_alloc(struct kmem_cache *s, int node, gfp_t gfpflags) +{ + void *object = NULL; + struct kmem_cache_node *n = get_node(s, node); + + spin_lock(&n->shared_lock); + + while (!object) { + /* Direct allocation from remote shared cache */ + if (n->objects) { +#if 0 + /* Taking a hot object remotely */ + object = n->object[--n->objects]; +#else + /* Take a cold object from the remote shared cache */ + object = n->object[0]; + n->objects--; + memcpy(n->object, n->object + 1, n->objects * sizeof(void *)); +#endif + break; + } + + while (n->objects < s->batch) { + struct page *new; + int d; + + /* Should be getting cold remote page !! This is hot */ + new = __get_partial_node(n); + if (unlikely(!new)) { + + spin_unlock(&n->shared_lock); + + if (gfpflags & __GFP_WAIT) + local_irq_enable(); + + new = new_slab(s, gfpflags, node); + + if (gfpflags & __GFP_WAIT) + local_irq_disable(); + + spin_lock(&n->shared_lock); + + if (!new) + goto out; + + stat(s, ALLOC_SLAB); + slab_lock(new); + } else + stat(s, ALLOC_FROM_PARTIAL); + + d = min(s->batch - n->objects, available(new)); + retrieve_objects(s, new, n->object + n->objects, d); + n->objects += d; + + if (!all_objects_used(new)) + + add_partial(get_node(s, page_to_nid(new)), new, 1); + + else + add_full(s, get_node(s, page_to_nid(new)), new); + + slab_unlock(new); + } + } +out: + spin_unlock(&n->shared_lock); + return object; +} + +/* + * Check if the objects in a per cpu structure fit numa + * locality expectations. + */ +static inline int node_local(int node) +{ + if (node != -1 || numa_node_id() != node) + return 0; + return 1; +} + +#else +static inline int find_numa_node(struct kmem_cache *s, int selected_node) { return selected_node; } +static inline void *off_node_alloc(struct kmem_cache *s, int node, gfp_t gfpflags) { return NULL; } +static inline int node_local(int node) { return 1; } +#endif static void *slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr) @@ -1753,36 +1843,41 @@ redo: node = find_numa_node(s, node); local_irq_save(flags); c = __this_cpu_ptr(s->cpu_slab); - if (unlikely(!c->objects || !node_match(c, node))) { + if (unlikely(!c->objects || !node_local(node))) { + + struct kmem_cache_node *n; gfpflags &= gfp_allowed_mask; - if (unlikely(!node_match(c, node))) { - flush_cpu_objects(s, c); - c->node = node; - } else { - struct kmem_cache_node *n = get_node(s, c->node); + if (unlikely(!node_local(node))) { + object = off_node_alloc(s, node, gfpflags); + if (!object) + goto oom; + else + goto got_object; + } - /* - * Node specified is matching the stuff that we cache, - * so we could retrieve objects from the shared cache - * of the indicated node if there would be anything - * there. - */ - if (n->objects) { - int d; + n = get_node(s, numa_node_id()); - spin_lock(&n->shared_lock); - d = min(min(s->batch, s->shared), n->objects); - if (d > 0) { - memcpy(c->object + c->objects, - n->object + n->objects - d, - d * sizeof(void *)); - n->objects -= d; - c->objects += d; - } - spin_unlock(&n->shared_lock); + /* + * Node specified is matching the stuff that we cache, + * so we could retrieve objects from the shared cache + * of the indicated node if there would be anything + * there. + */ + if (n->objects) { + int d; + + spin_lock(&n->shared_lock); + d = min(min(s->batch, s->shared), n->objects); + if (d > 0) { + memcpy(c->object + c->objects, + n->object + n->objects - d, + d * sizeof(void *)); + n->objects -= d; + c->objects += d; } + spin_unlock(&n->shared_lock); } while (c->objects < s->batch) { @@ -1833,6 +1928,8 @@ redo: object = c->object[--c->objects]; +got_object: + if (unlikely(debug_on(s))) { if (!alloc_debug_processing(s, object, addr)) goto redo; @@ -1962,8 +2059,10 @@ static void slab_free(struct kmem_cache if (!(s->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(object, s->objsize); +#ifdef CONFIG_NUMA if (numa_off_node_free(s, x)) goto out; +#endif if (unlikely(c->objects >= s->queue)) { @@ -3941,8 +4040,9 @@ static ssize_t show_slab_objects(struct for_each_possible_cpu(cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + int node = cpu_to_node(cpu); - if (!c || c->node < 0) + if (!c) continue; if (c->objects) { @@ -3954,9 +4054,9 @@ static ssize_t show_slab_objects(struct x = 1; total += x; - nodes[c->node] += x; + nodes[node] += x; } - per_cpu[c->node]++; + per_cpu[node]++; } } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>