Hi I'm submitting this patch for the slab allocator. The patch adds a new flag SLAB_MINIMIZE_WASTE. When this flag is present, the slab subsystem will use higher-order allocations to minimize wasted space (it will not use higher-order allocations when there's no benefit, such as if the object size is a power of two). The reason why we need this is that we are going to merge code that does block device deduplication (it was developed separatedly and sold as a commercial product), and the code uses block sizes that are not a power of two (block sizes 192K, 448K, 640K, 832K are used in the wild). The slab allocator rounds up the allocation to the nearest power of two, but that wastes a lot of memory. Performance of the solution depends on efficient memory usage, so we should minimize wasted as much as possible. Larger-order allocations are unreliable (they may fail at any time if the memory is fragmented), but it is not an issue here - the code preallocates a few buffers with vmalloc and then allocates buffers from the slab cache. If the allocation fails due to memory fragmentation, we throw away and reuse some existing buffer, so there is no functionality loss. Mikulas From: Mikulas Patocka <mpatocka@xxxxxxxxxx> This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This flag causes allocation of larger slab caches in order to minimize wasted space. Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> --- include/linux/slab.h | 7 +++++++ mm/slab.c | 4 ++-- mm/slab.h | 7 ++++--- mm/slab_common.c | 2 +- mm/slub.c | 25 ++++++++++++++++++++----- 6 files changed, 35 insertions(+), 12 deletions(-) Index: linux-2.6/include/linux/slab.h =================================================================== --- linux-2.6.orig/include/linux/slab.h 2018-03-20 14:59:20.528030000 +0100 +++ linux-2.6/include/linux/slab.h 2018-03-20 14:59:20.518030000 +0100 @@ -108,6 +108,13 @@ #define SLAB_KASAN 0 #endif +/* + * Use higer order allocations to minimize wasted space. + * Note: the allocation is unreliable if this flag is used, the caller + * must handle allocation failures gracefully. + */ +#define SLAB_MINIMIZE_WASTE ((slab_flags_t __force)0x10000000U) + /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) Index: linux-2.6/mm/slab_common.c =================================================================== --- linux-2.6.orig/mm/slab_common.c 2018-03-20 14:59:20.528030000 +0100 +++ linux-2.6/mm/slab_common.c 2018-03-20 14:59:20.518030000 +0100 @@ -52,7 +52,7 @@ static DECLARE_WORK(slab_caches_to_rcu_d SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ - SLAB_ACCOUNT) + SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE) /* * Merge control. If this is set then no merging of slab caches will occur. Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2018-03-20 14:59:20.528030000 +0100 +++ linux-2.6/mm/slub.c 2018-03-20 14:59:20.518030000 +0100 @@ -3234,7 +3234,7 @@ static inline int slab_order(int size, i return order; } -static inline int calculate_order(int size, int reserved) +static inline int calculate_order(int size, int reserved, slab_flags_t flags) { int order; int min_objects; @@ -3261,7 +3261,7 @@ static inline int calculate_order(int si order = slab_order(size, min_objects, slub_max_order, fraction, reserved); if (order <= slub_max_order) - return order; + goto ret_order; fraction /= 2; } min_objects--; @@ -3273,15 +3273,30 @@ static inline int calculate_order(int si */ order = slab_order(size, 1, slub_max_order, 1, reserved); if (order <= slub_max_order) - return order; + goto ret_order; /* * Doh this slab cannot be placed using slub_max_order. */ order = slab_order(size, 1, MAX_ORDER, 1, reserved); if (order < MAX_ORDER) - return order; + goto ret_order; return -ENOSYS; + +ret_order: + if (flags & SLAB_MINIMIZE_WASTE) { + /* Increase the order if it decreases waste */ + int test_order; + for (test_order = order + 1; test_order < MAX_ORDER; test_order++) { + unsigned long order_objects = ((PAGE_SIZE << order) - reserved) / size; + unsigned long test_order_objects = ((PAGE_SIZE << test_order) - reserved) / size; + if (test_order_objects >= min(64, MAX_OBJS_PER_PAGE)) + break; + if (test_order_objects > order_objects << (test_order - order)) + order = test_order; + } + } + return order; } static void @@ -3546,7 +3561,7 @@ static int calculate_sizes(struct kmem_c if (forced_order >= 0) order = forced_order; else - order = calculate_order(size, s->reserved); + order = calculate_order(size, s->reserved, flags); if (order < 0) return 0; Index: linux-2.6/mm/slab.h =================================================================== --- linux-2.6.orig/mm/slab.h 2018-03-20 14:59:20.528030000 +0100 +++ linux-2.6/mm/slab.h 2018-03-20 14:59:20.518030000 +0100 @@ -142,10 +142,10 @@ static inline slab_flags_t kmem_cache_fl #if defined(CONFIG_SLAB) #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \ - SLAB_ACCOUNT) + SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE) #elif defined(CONFIG_SLUB) #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_TEMPORARY | SLAB_ACCOUNT) + SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE) #else #define SLAB_CACHE_FLAGS (0) #endif @@ -164,7 +164,8 @@ static inline slab_flags_t kmem_cache_fl SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | \ - SLAB_ACCOUNT) + SLAB_ACCOUNT | \ + SLAB_MINIMIZE_WASTE) int __kmem_cache_shutdown(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *); Index: linux-2.6/mm/slab.c =================================================================== --- linux-2.6.orig/mm/slab.c 2018-03-20 14:59:20.528030000 +0100 +++ linux-2.6/mm/slab.c 2018-03-20 14:59:20.518030000 +0100 @@ -1789,14 +1789,14 @@ static size_t calculate_slab_order(struc * as GFP_NOFS and we really don't want to have to be allocating * higher-order pages when we are unable to shrink dcache. */ - if (flags & SLAB_RECLAIM_ACCOUNT) + if (flags & SLAB_RECLAIM_ACCOUNT && !(flags & SLAB_MINIMIZE_WASTE)) break; /* * Large number of objects is good, but very large slabs are * currently bad for the gfp()s. */ - if (gfporder >= slab_max_order) + if (gfporder >= slab_max_order && !(flags & SLAB_MINIMIZE_WASTE)) break; /* -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel