On 2023/12/5 03:34, Vlastimil Babka wrote: > Currently we have a single function slab_free() handling both single > object freeing and bulk freeing with necessary hooks, the latter case > requiring slab_free_freelist_hook(). It should be however better to > distinguish the two use cases for the following reasons: > > - code simpler to follow for the single object case > > - better code generation - although inlining should eliminate the > slab_free_freelist_hook() for single object freeing in case no > debugging options are enabled, it seems it's not perfect. When e.g. > KASAN is enabled, we're imposing additional unnecessary overhead for > single object freeing. > > - preparation to add percpu array caches in near future > > Therefore, simplify slab_free() for the single object case by dropping > unnecessary parameters and calling only slab_free_hook() instead of > slab_free_freelist_hook(). Rename the bulk variant to slab_free_bulk() > and adjust callers accordingly. > > While at it, flip (and document) slab_free_hook() return value so that > it returns true when the freeing can proceed, which matches the logic of > slab_free_freelist_hook() and is not confusingly the opposite. > > Additionally we can simplify a bit by changing the tail parameter of > do_slab_free() when freeing a single object - instead of NULL we can set > it equal to head. > > bloat-o-meter shows small code reduction with a .config that has KASAN > etc disabled: > > add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-118 (-118) > Function old new delta > kmem_cache_alloc_bulk 1203 1196 -7 > kmem_cache_free 861 835 -26 > __kmem_cache_free 741 704 -37 > kmem_cache_free_bulk 911 863 -48 > > Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx> Looks good to me. Reviewed-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> Thanks! > --- > mm/slub.c | 59 +++++++++++++++++++++++++++++++++++------------------------ > 1 file changed, 35 insertions(+), 24 deletions(-) > > diff --git a/mm/slub.c b/mm/slub.c > index 0742564c4538..ed2fa92e914c 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -2037,9 +2037,12 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, > /* > * Hooks for other subsystems that check memory allocations. In a typical > * production configuration these hooks all should produce no code at all. > + * > + * Returns true if freeing of the object can proceed, false if its reuse > + * was delayed by KASAN quarantine. > */ > -static __always_inline bool slab_free_hook(struct kmem_cache *s, > - void *x, bool init) > +static __always_inline > +bool slab_free_hook(struct kmem_cache *s, void *x, bool init) > { > kmemleak_free_recursive(x, s->flags); > kmsan_slab_free(s, x); > @@ -2072,7 +2075,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, > s->size - s->inuse - rsize); > } > /* KASAN might put x into memory quarantine, delaying its reuse. */ > - return kasan_slab_free(s, x, init); > + return !kasan_slab_free(s, x, init); > } > > static inline bool slab_free_freelist_hook(struct kmem_cache *s, > @@ -2082,7 +2085,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, > > void *object; > void *next = *head; > - void *old_tail = *tail ? *tail : *head; > + void *old_tail = *tail; > > if (is_kfence_address(next)) { > slab_free_hook(s, next, false); > @@ -2098,8 +2101,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, > next = get_freepointer(s, object); > > /* If object's reuse doesn't have to be delayed */ > - if (likely(!slab_free_hook(s, object, > - slab_want_init_on_free(s)))) { > + if (likely(slab_free_hook(s, object, > + slab_want_init_on_free(s)))) { > /* Move object to the new freelist */ > set_freepointer(s, object, *head); > *head = object; > @@ -2114,9 +2117,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, > } > } while (object != old_tail); > > - if (*head == *tail) > - *tail = NULL; > - > return *head != NULL; > } > > @@ -4227,7 +4227,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s, > struct slab *slab, void *head, void *tail, > int cnt, unsigned long addr) > { > - void *tail_obj = tail ? : head; > struct kmem_cache_cpu *c; > unsigned long tid; > void **freelist; > @@ -4246,14 +4245,14 @@ static __always_inline void do_slab_free(struct kmem_cache *s, > barrier(); > > if (unlikely(slab != c->slab)) { > - __slab_free(s, slab, head, tail_obj, cnt, addr); > + __slab_free(s, slab, head, tail, cnt, addr); > return; > } > > if (USE_LOCKLESS_FAST_PATH()) { > freelist = READ_ONCE(c->freelist); > > - set_freepointer(s, tail_obj, freelist); > + set_freepointer(s, tail, freelist); > > if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) { > note_cmpxchg_failure("slab_free", s, tid); > @@ -4270,7 +4269,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s, > tid = c->tid; > freelist = c->freelist; > > - set_freepointer(s, tail_obj, freelist); > + set_freepointer(s, tail, freelist); > c->freelist = head; > c->tid = next_tid(tid); > > @@ -4283,15 +4282,27 @@ static void do_slab_free(struct kmem_cache *s, > struct slab *slab, void *head, void *tail, > int cnt, unsigned long addr) > { > - void *tail_obj = tail ? : head; > - > - __slab_free(s, slab, head, tail_obj, cnt, addr); > + __slab_free(s, slab, head, tail, cnt, addr); > } > #endif /* CONFIG_SLUB_TINY */ > > -static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, > - void *head, void *tail, void **p, int cnt, > - unsigned long addr) > +static __fastpath_inline > +void slab_free(struct kmem_cache *s, struct slab *slab, void *object, > + unsigned long addr) > +{ > + bool init; > + > + memcg_slab_free_hook(s, slab, &object, 1); > + > + init = !is_kfence_address(object) && slab_want_init_on_free(s); > + > + if (likely(slab_free_hook(s, object, init))) > + do_slab_free(s, slab, object, object, 1, addr); > +} > + > +static __fastpath_inline > +void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head, > + void *tail, void **p, int cnt, unsigned long addr) > { > memcg_slab_free_hook(s, slab, p, cnt); > /* > @@ -4305,7 +4316,7 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, > #ifdef CONFIG_KASAN_GENERIC > void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) > { > - do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr); > + do_slab_free(cache, virt_to_slab(x), x, x, 1, addr); > } > #endif > > @@ -4349,7 +4360,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x) > if (!s) > return; > trace_kmem_cache_free(_RET_IP_, x, s); > - slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_); > + slab_free(s, virt_to_slab(x), x, _RET_IP_); > } > EXPORT_SYMBOL(kmem_cache_free); > > @@ -4395,7 +4406,7 @@ void kfree(const void *object) > > slab = folio_slab(folio); > s = slab->slab_cache; > - slab_free(s, slab, x, NULL, &x, 1, _RET_IP_); > + slab_free(s, slab, x, _RET_IP_); > } > EXPORT_SYMBOL(kfree); > > @@ -4512,8 +4523,8 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) > if (!df.slab) > continue; > > - slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt, > - _RET_IP_); > + slab_free_bulk(df.s, df.slab, df.freelist, df.tail, &p[size], > + df.cnt, _RET_IP_); > } while (likely(size)); > } > EXPORT_SYMBOL(kmem_cache_free_bulk); >