This makes the size of the shared array configurable. Not that this is a bit problematic and there are likely unresolved race conditions. The kmem_cache->node[x] pointers become unstable if interrupts are allowed. Signed-off-by: Christoph Lameter <cl@xxxxxxxxxxxxxxxxxxxx> --- include/linux/slub_def.h | 3 + mm/slub.c | 133 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 116 insertions(+), 20 deletions(-) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2010-05-21 13:17:14.000000000 -0500 +++ linux-2.6/include/linux/slub_def.h 2010-05-21 13:47:41.000000000 -0500 @@ -81,11 +81,14 @@ struct kmem_cache { struct kmem_cache_order_objects oo; int queue; /* per cpu queue size */ int batch; /* batch size */ + int shared; /* Shared queue size */ +#ifndef CONFIG_NUMA /* * Avoid an extra cache line for UP, SMP and for the node local to * struct kmem_cache. */ struct kmem_cache_node local_node; +#endif /* Allocation and freeing of slabs */ struct kmem_cache_order_objects max; Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2010-05-21 13:17:14.000000000 -0500 +++ linux-2.6/mm/slub.c 2010-05-21 13:48:01.000000000 -0500 @@ -1754,7 +1754,7 @@ redo: int d; spin_lock(&n->shared_lock); - d = min(min(s->batch, BOOT_QUEUE_SIZE), n->objects); + d = min(min(s->batch, s->shared), n->objects); if (d > 0) { memcpy(c->object + c->objects, n->object + n->objects - d, @@ -1864,6 +1864,7 @@ void *kmem_cache_alloc_node(struct kmem_ return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node); + #endif #ifdef CONFIG_TRACING @@ -2176,10 +2177,7 @@ static void free_kmem_cache_nodes(struct int node; for_each_node_state(node, N_NORMAL_MEMORY) { - struct kmem_cache_node *n = s->node[node]; - - if (n && n != &s->local_node) - kfree(n); + kfree(s->node[node]); s->node[node] = NULL; } } @@ -2197,27 +2195,96 @@ static int init_kmem_cache_nodes(struct for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n; - if (local_node == node) - n = &s->local_node; - else { - if (slab_state == DOWN) { - early_kmem_cache_node_alloc(gfpflags, node); - continue; - } - n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags, - node); - - if (!n) { - free_kmem_cache_nodes(s); - return 0; - } + if (slab_state == DOWN) { + early_kmem_cache_node_alloc(gfpflags, node); + continue; + } + n = kmalloc_node(sizeof(struct kmem_cache_node), gfpflags, + node); + if (!n) { + free_kmem_cache_nodes(s); + return 0; } s->node[node] = n; init_kmem_cache_node(n, s); } return 1; } + +static void resize_shared_queue(struct kmem_cache *s, int shared) +{ + + if (is_kmalloc_cache(s)) { + if (shared < BOOT_QUEUE_SIZE) { + s->shared = shared; + } else { + /* More than max. Go to max allowed */ + s->queue = BOOT_QUEUE_SIZE; + s->batch = BOOT_BATCH_SIZE; + } + } else { + int node; + + /* Create the new cpu queue and then free the old one */ + down_write(&slub_lock); + + /* We can only shrink the queue here since the new + * queue size may be smaller and there may be concurrent + * slab operations. The upate of the queue must be seen + * before the change of the location of the percpu queue. + * + * Note that the queue may contain more object than the + * queue size after this operation. + */ + if (shared < s->shared) { + s->shared = shared; + barrier(); + } + + + /* Serialization has not been worked out yet */ + for_each_online_node(node) { + struct kmem_cache_node *n = get_node(s, node); + struct kmem_cache_node *nn = + kmalloc_node(sizeof(struct kmem_cache_node), + GFP_KERNEL, node); + + init_kmem_cache_node(nn, s); + s->node[node] = nn; + + spin_lock(&nn->list_lock); + list_move(&n->partial, &nn->partial); +#ifdef CONFIG_SLUB_DEBUG + list_move(&n->full, &nn->full); +#endif + spin_unlock(&nn->list_lock); + + nn->nr_partial = n->nr_partial; +#ifdef CONFIG_SLUB_DEBUG + nn->nr_slabs = n->nr_slabs; + nn->total_objects = n->total_objects; +#endif + + spin_lock(&nn->shared_lock); + nn->objects = n->objects; + memcpy(&nn->object, n->object, nn->objects * sizeof(void *)); + spin_unlock(&nn->shared_lock); + + kfree(n); + } + /* + * If the queue needs to be extended then we deferred + * the update until now when the larger sized queue + * has been allocated and is working. + */ + if (shared > s->shared) + s->shared = shared; + + up_write(&slub_lock); + } +} + #else static void free_kmem_cache_nodes(struct kmem_cache *s) { @@ -3989,6 +4056,31 @@ static ssize_t cpu_queue_size_store(stru } SLAB_ATTR(cpu_queue_size); +#ifdef CONFIG_NUMA +static ssize_t shared_queue_size_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%u\n", s->shared); +} + +static ssize_t shared_queue_size_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + unsigned long queue; + int err; + + err = strict_strtoul(buf, 10, &queue); + if (err) + return err; + + if (queue > 10000 || queue < s->batch) + return -EINVAL; + + resize_shared_queue(s, queue); + return length; +} +SLAB_ATTR(shared_queue_size); +#endif + static ssize_t cpu_batch_size_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%u\n", s->batch); @@ -4388,6 +4480,7 @@ static struct attribute *slab_attrs[] = &cache_dma_attr.attr, #endif #ifdef CONFIG_NUMA + &shared_queue_size_attr.attr, &remote_node_defrag_ratio_attr.attr, #endif #ifdef CONFIG_SLUB_STATS @@ -4720,7 +4813,7 @@ static int s_show(struct seq_file *m, vo seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, nr_objs, s->size, oo_objects(s->oo), (1 << oo_order(s->oo))); - seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, 0); + seq_printf(m, " : tunables %4u %4u %4u", s->queue, s->batch, s->shared); seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, 0UL); seq_putc(m, '\n'); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>