On Fri, Mar 04, 2022 at 01:45:16PM +0100, Vlastimil Babka wrote: > On 3/4/22 07:34, Hyeonggon Yoo wrote: > > There is not much benefit for serving large objects in kmalloc(). > > Let's pass large requests to page allocator like SLUB for better > > maintenance of common code. > > > > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx> > > Looks like it's on the right path! Some suggestions follow. > Thank you for reviewing this! You have really good eye. > > --- > > include/linux/slab.h | 35 ++++++++++++++++------------------- > > mm/slab.c | 31 +++++++++++++++++++++++++++---- > > mm/slab.h | 19 +++++++++++++++++++ > > mm/slub.c | 19 ------------------- > > 4 files changed, 62 insertions(+), 42 deletions(-) > > > > diff --git a/include/linux/slab.h b/include/linux/slab.h > > index 37bde99b74af..e7b3330db4f3 100644 > > --- a/include/linux/slab.h > > +++ b/include/linux/slab.h > > @@ -224,29 +224,19 @@ void kmem_dump_obj(void *object); > > * Kmalloc array related definitions > > */ > > > > -#ifdef CONFIG_SLAB > > /* > > - * The largest kmalloc size supported by the SLAB allocators is > > - * 32 megabyte (2^25) or the maximum allocatable page order if that is > > - * less than 32 MB. > > - * > > - * WARNING: Its not easy to increase this value since the allocators have > > - * to do various tricks to work around compiler limitations in order to > > - * ensure proper constant folding. > > + * SLAB and SLUB directly allocates requests fitting in to an order-1 page > > + * (PAGE_SIZE*2). Larger requests are passed to the page allocator. > > */ > > -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ > > - (MAX_ORDER + PAGE_SHIFT - 1) : 25) > > -#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH > > +#ifdef CONFIG_SLAB > > +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) > > +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) > > I think we should be able to also remove the larger sizes from > __kmalloc_index() later in this file. > Right. we can remove sizes larger than max(PAGE_SHIFT) + 1. searching using cscope, maximum PAGE_SHIFT is 20 (1MB) in hexagon. I think we can remove anything larger than 2MB. But Hmm, I'll add something like static_assert(PAGE_SHIFT <= 20). Let's see if any architecture/config complain this. > > #ifndef KMALLOC_SHIFT_LOW > > #define KMALLOC_SHIFT_LOW 5 > > #endif > > #endif > > > > #ifdef CONFIG_SLUB > > -/* > > - * SLUB directly allocates requests fitting in to an order-1 page > > - * (PAGE_SIZE*2). Larger requests are passed to the page allocator. > > - */ > > #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) > > #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) > > #ifndef KMALLOC_SHIFT_LOW > > @@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl > > * Try really hard to succeed the allocation but fail > > * eventually. > > */ > > +#ifndef CONFIG_SLOB > > static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) > > { > > if (__builtin_constant_p(size)) { > > -#ifndef CONFIG_SLOB > > unsigned int index; > > -#endif > > + > > if (size > KMALLOC_MAX_CACHE_SIZE) > > return kmalloc_large(size, flags); > > -#ifndef CONFIG_SLOB > > + > > index = kmalloc_index(size); > > > > if (!index) > > @@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) > > return kmem_cache_alloc_trace( > > kmalloc_caches[kmalloc_type(flags)][index], > > flags, size); > > -#endif > > } > > return __kmalloc(size, flags); > > } > > +#else > > +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) > > +{ > > + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE) > > + return kmalloc_large(size, flags); > > + return __kmalloc(size, flags); > > +} > > +#endif > > > > static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) > > { > > diff --git a/mm/slab.c b/mm/slab.c > > index ddf5737c63d9..570af6dc3478 100644 > > --- a/mm/slab.c > > +++ b/mm/slab.c > > @@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) > > void *ret; > > > > if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) > > - return NULL; > > + return kmalloc_large(size, flags); > > This loses the node information and NUMA locality. Oh, my fault. Thank you for catching this. > It would be better to generalize kmalloc_large_node() from mm/slub.c instead. Sounds nice. > > + > > cachep = kmalloc_slab(size, flags); > > if (unlikely(ZERO_OR_NULL_PTR(cachep))) > > return cachep; > > @@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, > > void *ret; > > > > if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) > > - return NULL; > > + return kmalloc_large(size, flags); > > + > > cachep = kmalloc_slab(size, flags); > > if (unlikely(ZERO_OR_NULL_PTR(cachep))) > > return cachep; > > @@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) > > { > > struct kmem_cache *s; > > size_t i; > > + struct folio *folio; > > > > local_irq_disable(); > > for (i = 0; i < size; i++) { > > void *objp = p[i]; > > > > - if (!orig_s) /* called via kfree_bulk */ > > + if (!orig_s) { > > + /* called via kfree_bulk */ > > + folio = virt_to_folio(objp); > > + if (unlikely(!folio_test_slab(folio))) { > > + free_large_kmalloc(folio, objp); > > Hmm I'm not sure it's a good idea to call this and the page allocator with > disabled irq's. Maybe simply re-enable around it? Seems reasonable. re-enabling around here will be sufficient for this function. By The Way, it seems SLAB really does not care about disabling irq even if it's unnecessary. I would like to try reducing irq-disabled area in SLAB and how it affects on macro/micro benchmarks after this series is done :) > > + continue; > > + } > > s = virt_to_cache(objp); > > Since we already have the folio, we shoold instead use folio_slab and > slab->slab_cache (see how mm/slub.c does it). > The same applies to functions below as well. Right. I'll change them in v3. > > - else > > + } else > > s = cache_from_obj(orig_s, objp); > > if (!s) > > continue; > > @@ -3776,11 +3785,20 @@ void kfree(const void *objp) > > { > > struct kmem_cache *c; > > unsigned long flags; > > + struct folio *folio; > > + void *object = (void *) objp; > > > > trace_kfree(_RET_IP_, objp); > > > > if (unlikely(ZERO_OR_NULL_PTR(objp))) > > return; > > + > > + folio = virt_to_folio(objp); > > + if (unlikely(!folio_test_slab(folio))) { > > + free_large_kmalloc(folio, object); > > + return; > > + } > > + > > local_irq_save(flags); > > kfree_debugcheck(objp); > > c = virt_to_cache(objp); > > @@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n, > > size_t __ksize(const void *objp) > > { > > struct kmem_cache *c; > > + struct folio *folio; > > size_t size; > > > > BUG_ON(!objp); > > if (unlikely(objp == ZERO_SIZE_PTR)) > > return 0; > > > > + folio = virt_to_folio(objp); > > + if (!folio_test_slab(folio)) > > + return folio_size(folio); > > + > > c = virt_to_cache(objp); > > size = c ? c->object_size : 0; > > > > diff --git a/mm/slab.h b/mm/slab.h > > index c7f2abc2b154..31e98beb47a3 100644 > > --- a/mm/slab.h > > +++ b/mm/slab.h > > @@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) > > print_tracking(cachep, x); > > return cachep; > > } > > + > > +static __always_inline void kfree_hook(void *x) > > +{ > > + kmemleak_free(x); > > + kasan_kfree_large(x); > > Looks like there's only one caller of kfree_hook() so we could do that > directly there. > Right. > > +} > > + > > +static inline void free_large_kmalloc(struct folio *folio, void *object) > > +{ > > + unsigned int order = folio_order(folio); > > + > > + if (WARN_ON_ONCE(order == 0)) > > + pr_warn_once("object pointer: 0x%p\n", object); > > + > > + kfree_hook(object); > > + mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B, > > + -(PAGE_SIZE << order)); > > + __free_pages(folio_page(folio, 0), order); > > +} > > #endif /* CONFIG_SLOB */ > > > > static inline size_t slab_ksize(const struct kmem_cache *s) > > diff --git a/mm/slub.c b/mm/slub.c > > index 261474092e43..04fd084f4709 100644 > > --- a/mm/slub.c > > +++ b/mm/slub.c > > @@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) > > return ptr; > > } > > > > -static __always_inline void kfree_hook(void *x) > > -{ > > - kmemleak_free(x); > > - kasan_kfree_large(x); > > -} > > - > > static __always_inline bool slab_free_hook(struct kmem_cache *s, > > void *x, bool init) > > { > > @@ -3535,19 +3529,6 @@ struct detached_freelist { > > struct kmem_cache *s; > > }; > > > > -static inline void free_large_kmalloc(struct folio *folio, void *object) > > -{ > > - unsigned int order = folio_order(folio); > > - > > - if (WARN_ON_ONCE(order == 0)) > > - pr_warn_once("object pointer: 0x%p\n", object); > > - > > - kfree_hook(object); > > - mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B, > > - -(PAGE_SIZE << order)); > > - __free_pages(folio_page(folio, 0), order); > > -} > > - > > /* > > * This function progressively scans the array with free objects (with > > * a limited look ahead) and extract objects belonging to the same > Thank you so much! -- Thank you, You are awesome! Hyeonggon :-)