On Fri, 11 Sep 2020 at 09:17, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote: > > On Mon, Sep 7, 2020 at 3:41 PM Marco Elver <elver@xxxxxxxxxx> wrote: > > > > From: Alexander Potapenko <glider@xxxxxxxxxx> > > > > Inserts KFENCE hooks into the SLAB allocator. > > > > We note the addition of the 'orig_size' argument to slab_alloc*() > > functions, to be able to pass the originally requested size to KFENCE. > > When KFENCE is disabled, there is no additional overhead, since these > > functions are __always_inline. > > > > Co-developed-by: Marco Elver <elver@xxxxxxxxxx> > > Signed-off-by: Marco Elver <elver@xxxxxxxxxx> > > Signed-off-by: Alexander Potapenko <glider@xxxxxxxxxx> > > --- > > mm/slab.c | 46 ++++++++++++++++++++++++++++++++++------------ > > mm/slab_common.c | 6 +++++- > > 2 files changed, 39 insertions(+), 13 deletions(-) > > > > diff --git a/mm/slab.c b/mm/slab.c > > index 3160dff6fd76..30aba06ae02b 100644 > > --- a/mm/slab.c > > +++ b/mm/slab.c > > @@ -100,6 +100,7 @@ > > #include <linux/seq_file.h> > > #include <linux/notifier.h> > > #include <linux/kallsyms.h> > > +#include <linux/kfence.h> > > #include <linux/cpu.h> > > #include <linux/sysctl.h> > > #include <linux/module.h> > > @@ -3206,7 +3207,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, > > } > > > > static __always_inline void * > > -slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, > > +slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size, > > unsigned long caller) > > { > > unsigned long save_flags; > > @@ -3219,6 +3220,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, > > if (unlikely(!cachep)) > > return NULL; > > > > + ptr = kfence_alloc(cachep, orig_size, flags); > > + if (unlikely(ptr)) > > + goto out_hooks; > > + > > cache_alloc_debugcheck_before(cachep, flags); > > local_irq_save(save_flags); > > > > @@ -3251,6 +3256,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, > > if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr) > > memset(ptr, 0, cachep->object_size); > > > > +out_hooks: > > slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr); > > return ptr; > > } > > @@ -3288,7 +3294,7 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) > > #endif /* CONFIG_NUMA */ > > > > static __always_inline void * > > -slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) > > +slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned long caller) > > { > > unsigned long save_flags; > > void *objp; > > @@ -3299,6 +3305,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) > > if (unlikely(!cachep)) > > return NULL; > > > > + objp = kfence_alloc(cachep, orig_size, flags); > > + if (unlikely(objp)) > > + goto leave; > > + > > cache_alloc_debugcheck_before(cachep, flags); > > local_irq_save(save_flags); > > objp = __do_cache_alloc(cachep, flags); > > @@ -3309,6 +3319,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) > > if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp) > > memset(objp, 0, cachep->object_size); > > > > +leave: > > slab_post_alloc_hook(cachep, objcg, flags, 1, &objp); > > return objp; > > } > > @@ -3414,6 +3425,11 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) > > static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, > > unsigned long caller) > > { > > + if (kfence_free(objp)) { > > + kmemleak_free_recursive(objp, cachep->flags); > > + return; > > + } > > + > > /* Put the object into the quarantine, don't touch it for now. */ > > if (kasan_slab_free(cachep, objp, _RET_IP_)) > > return; > > @@ -3479,7 +3495,7 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, > > */ > > void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) > > { > > - void *ret = slab_alloc(cachep, flags, _RET_IP_); > > + void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_); > > > It's kinda minor, but since we are talking about malloc fast path: > will passing 0 instead of cachep->object_size (here and everywhere > else) and then using cachep->object_size on the slow path if 0 is > passed as size improve codegen? It doesn't save us much, maybe 1 instruction based on what I'm looking at right now. The main worry I have is that the 'orig_size' argument is now part of slab_alloc, and changing its semantics may cause problems in future if it's no longer just passed to kfence_alloc(). Today, we can do the 'size = size ?: cache->object_size' trick inside kfence_alloc(), but at the cost breaking the intuitive semantics of slab_alloc's orig_size argument for future users. Is it worth it? Thanks, -- Marco