On Fri, Apr 22, 2022 at 01:18:30PM -0700, Peter Collingbourne wrote: > When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum > slab alignment to 16. This happens even if MTE is not supported in > hardware or disabled via kasan=off, which creates an unnecessary > memory overhead in those cases. Eliminate this overhead by making > the minimum slab alignment a runtime property and only aligning to > 16 if KASAN is enabled at runtime. > > On a DragonBoard 845c (non-MTE hardware) with a kernel built with > CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android > boot I see the following Slab measurements in /proc/meminfo (median > of 3 reboots): > > Before: 169020 kB > After: 167304 kB > > Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead > Signed-off-by: Peter Collingbourne <pcc@xxxxxxxxxx> > Reviewed-by: Andrey Konovalov <andreyknvl@xxxxxxxxx> > --- > v3: > - go back to ARCH_SLAB_MINALIGN > - revert changes to fs/binfmt_flat.c > - update arch_slab_minalign() comment to say that it must be a power of two > > v2: > - use max instead of max_t in flat_stack_align() > > arch/arm64/include/asm/cache.h | 17 ++++++++++++----- > include/linux/slab.h | 12 ++++++++++++ > mm/slab.c | 7 +++---- > mm/slab_common.c | 3 +-- > mm/slob.c | 6 +++--- > 5 files changed, 31 insertions(+), 14 deletions(-) > > diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h > index a074459f8f2f..22b22dc1b1b5 100644 > --- a/arch/arm64/include/asm/cache.h > +++ b/arch/arm64/include/asm/cache.h > @@ -6,6 +6,7 @@ > #define __ASM_CACHE_H > > #include <asm/cputype.h> > +#include <asm/mte-def.h> > > #define CTR_L1IP_SHIFT 14 > #define CTR_L1IP_MASK 3 > @@ -49,16 +50,22 @@ > */ > #define ARCH_DMA_MINALIGN (128) > > +#ifndef __ASSEMBLY__ > + > +#include <linux/bitops.h> > +#include <linux/kasan-enabled.h> > + > #ifdef CONFIG_KASAN_SW_TAGS > #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) > #elif defined(CONFIG_KASAN_HW_TAGS) > -#define ARCH_SLAB_MINALIGN MTE_GRANULE_SIZE > +static inline size_t arch_slab_minalign(void) > +{ > + return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE : > + __alignof__(unsigned long long); > +} > +#define arch_slab_minalign() arch_slab_minalign() > #endif > > -#ifndef __ASSEMBLY__ > - > -#include <linux/bitops.h> > - > #define ICACHEF_ALIASING 0 > #define ICACHEF_VPIPT 1 > extern unsigned long __icache_flags; > diff --git a/include/linux/slab.h b/include/linux/slab.h > index 373b3ef99f4e..2c7190db4cc0 100644 > --- a/include/linux/slab.h > +++ b/include/linux/slab.h > @@ -209,6 +209,18 @@ void kmem_dump_obj(void *object); > #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) > #endif > > +/* > + * Arches can define this function if they want to decide the minimum slab > + * alignment at runtime. The value returned by the function must be a power > + * of two and >= ARCH_SLAB_MINALIGN. > + */ > +#ifndef arch_slab_minalign > +static inline size_t arch_slab_minalign(void) > +{ > + return ARCH_SLAB_MINALIGN; > +} > +#endif > + > /* > * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned > * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN > diff --git a/mm/slab.c b/mm/slab.c > index 0edb474edef1..97b756976c8b 100644 > --- a/mm/slab.c > +++ b/mm/slab.c > @@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, > objp += obj_offset(cachep); > if (cachep->ctor && cachep->flags & SLAB_POISON) > cachep->ctor(objp); > - if (ARCH_SLAB_MINALIGN && > - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { > - pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", > - objp, (int)ARCH_SLAB_MINALIGN); > + if ((unsigned long)objp & (arch_slab_minalign() - 1)) { > + pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp, > + (int)arch_slab_minalign()); > } > return objp; > } > diff --git a/mm/slab_common.c b/mm/slab_common.c > index 2b3206a2c3b5..33cc49810a54 100644 > --- a/mm/slab_common.c > +++ b/mm/slab_common.c > @@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags, > align = max(align, ralign); > } > > - if (align < ARCH_SLAB_MINALIGN) > - align = ARCH_SLAB_MINALIGN; > + align = max_t(size_t, align, arch_slab_minalign()); > > return ALIGN(align, sizeof(void *)); > } > diff --git a/mm/slob.c b/mm/slob.c > index 40ea6e2d4ccd..3bd2669bd690 100644 > --- a/mm/slob.c > +++ b/mm/slob.c > @@ -478,7 +478,7 @@ static __always_inline void * > __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) > { > unsigned int *m; > - int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); > + int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); > void *ret; > > gfp &= gfp_allowed_mask; > @@ -555,7 +555,7 @@ void kfree(const void *block) > > sp = virt_to_folio(block); > if (folio_test_slab(sp)) { > - int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); > + int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); > unsigned int *m = (unsigned int *)(block - align); > slob_free(m, *m + align); > } else { > @@ -584,7 +584,7 @@ size_t __ksize(const void *block) > if (unlikely(!folio_test_slab(folio))) > return folio_size(folio); > > - align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); > + align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); > m = (unsigned int *)(block - align); > return SLOB_UNITS(*m) * SLOB_UNIT; > } > -- > 2.36.0.rc2.479.g8af0fa9b8e-goog > Looks good to me. Reviewed-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx> And work properly on my arm64 machine (no MTE support) Tested-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx> Thanks! -- Thanks, Hyeonggon