Re: [PATCH v2 3/3] mm/slub: Use percpu partial free counter

Christoph Lameter <cl@xxxxxxxxx> · Tue, 2 Mar 2021 10:14:53 +0100 (CET)

On Mon, 10 Aug 2020, Xunlei Pang wrote:

>
> diff --git a/mm/slab.h b/mm/slab.h
> index c85e2fa..a709a70 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -616,7 +616,7 @@ struct kmem_cache_node {
>  #ifdef CONFIG_SLUB
>  	unsigned long nr_partial;
>  	struct list_head partial;
> -	atomic_long_t partial_free_objs;
> +	atomic_long_t __percpu *partial_free_objs;

A percpu counter is never atomic. Just use unsigned long and use this_cpu
operations for this thing. That should cut down further on the overhead.

> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1775,11 +1775,21 @@ static void discard_slab(struct kmem_cache *s, struct page *page)
>  /*
>   * Management of partially allocated slabs.
>   */
> +static inline long get_partial_free(struct kmem_cache_node *n)
> +{
> +	long nr = 0;
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu)
> +		nr += atomic_long_read(per_cpu_ptr(n->partial_free_objs, cpu));

this_cpu_read(*n->partial_free_objs)

>  static inline void
>  __update_partial_free(struct kmem_cache_node *n, long delta)
>  {
> -	atomic_long_add(delta, &n->partial_free_objs);
> +	atomic_long_add(delta, this_cpu_ptr(n->partial_free_objs));

this_cpu_add()

and so on.