Dedicated caches are available For fixed size allocations via kmem_cache_alloc(), but for dynamically sized allocations there is only the global kmalloc API's set of buckets available. This means it isn't possible to separate specific sets of dynamically sized allocations into a separate collection of caches. This leads to a use-after-free exploitation weakness in the Linux kernel since many heap memory spraying/grooming attacks depend on using userspace-controllable dynamically sized allocations to collide with fixed size allocations that end up in same cache. While CONFIG_RANDOM_KMALLOC_CACHES provides a probabilistic defense against these kinds of "type confusion" attacks, including for fixed same-size heap objects, we can create a complementary deterministic defense for dynamically sized allocations. In order to isolate user-controllable sized allocations from system allocations, introduce kmem_buckets_create() and kmem_buckets_alloc(), which behave like kmem_cache_create() and like kmem_cache_alloc() for confining allocations to a dedicated set of sized caches (which have the same layout as the kmalloc caches). This can also be used in the future once codetag allocation annotations exist to implement per-caller allocation cache isolation[1] even for dynamic allocations. Link: https://lore.kernel.org/lkml/202402211449.401382D2AF@keescook [1] Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx> --- Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Cc: Pekka Enberg <penberg@xxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Roman Gushchin <roman.gushchin@xxxxxxxxx> Cc: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx> Cc: linux-mm@xxxxxxxxx --- include/linux/slab.h | 26 ++++++++++++++++++ mm/slab_common.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/include/linux/slab.h b/include/linux/slab.h index b5f5ee8308d0..4a4ff84534be 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -492,6 +492,16 @@ void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *s, void *objp); +struct kmem_buckets { + struct kmem_cache *caches[ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL])]; +}; + +struct kmem_buckets * +kmem_buckets_create(const char *name, unsigned int align, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)); + + /* * Bulk allocation and freeing operations. These are accelerated in an * allocator specific way to avoid taking locks repeatedly or building @@ -594,6 +604,22 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } +static __always_inline __alloc_size(2) +void *kmem_buckets_alloc(struct kmem_buckets *b, size_t size, gfp_t flags) +{ + unsigned int index; + + if (size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large(size, flags); + if (WARN_ON_ONCE(!b)) + return NULL; + index = kmalloc_index(size); + if (WARN_ONCE(!b->caches[index], + "missing cache for size %zu (index %d)\n", size, index)) + return kmalloc(size, flags); + return kmalloc_trace(b->caches[index], flags, size); +} + static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && size) { diff --git a/mm/slab_common.c b/mm/slab_common.c index 238293b1dbe1..6002a182d014 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -392,6 +392,66 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, } EXPORT_SYMBOL(kmem_cache_create); +static struct kmem_cache *kmem_buckets_cache __ro_after_init; + +struct kmem_buckets * +kmem_buckets_create(const char *name, unsigned int align, + slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)) +{ + struct kmem_buckets *b; + int idx; + + if (WARN_ON(!kmem_buckets_cache)) + return NULL; + + b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO); + if (WARN_ON(!b)) + return NULL; + + for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) { + char *short_size, *cache_name; + unsigned int size; + + if (!kmalloc_caches[KMALLOC_NORMAL][idx]) + continue; + + size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size; + if (!size) + continue; + + short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-'); + if (WARN_ON(!short_size)) + goto fail; + + cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1); + if (WARN_ON(!cache_name)) + goto fail; + + b->caches[idx] = kmem_cache_create_usercopy(cache_name, size, + align, flags, useroffset, + min(size - useroffset, usersize), ctor); + kfree(cache_name); + if (WARN_ON(!b->caches[idx])) + goto fail; + } + + return b; + +fail: + for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) { + if (b->caches[idx]) { + kfree(b->caches[idx]->name); + kmem_cache_destroy(b->caches[idx]); + } + } + kfree(b); + + return NULL; +} +EXPORT_SYMBOL(kmem_buckets_create); + #ifdef SLAB_SUPPORTS_SYSFS /* * For a given kmem_cache, kmem_cache_destroy() should only be called @@ -934,6 +994,10 @@ void __init create_kmalloc_caches(slab_flags_t flags) /* Kmalloc array is now usable */ slab_state = UP; + + kmem_buckets_cache = kmem_cache_create("kmalloc_buckets", + sizeof(struct kmem_buckets) * ARRAY_SIZE(kmalloc_info), + 0, 0, NULL); } /** -- 2.34.1