On Fri, 15 Apr 2016 10:25:59 -0700 Thomas Garnier <thgarnie@xxxxxxxxxx> wrote: > Provide an optional config (CONFIG_FREELIST_RANDOM) to randomize the > SLAB freelist. The list is randomized during initialization of a new set > of pages. The order on different freelist sizes is pre-computed at boot > for performance. This security feature reduces the predictability of the > kernel SLAB allocator against heap overflows rendering attacks much less > stable. > > For example this attack against SLUB (also applicable against SLAB) > would be affected: > https://jon.oberheide.org/blog/2010/09/10/linux-kernel-can-slub-overflow/ > > Also, since v4.6 the freelist was moved at the end of the SLAB. It means > a controllable heap is opened to new attacks not yet publicly discussed. > A kernel heap overflow can be transformed to multiple use-after-free. > This feature makes this type of attack harder too. > > The config option name is not specific to the SLAB as this approach will > be extended to other allocators like SLUB. > > Performance results highlighted no major changes: > > ... > > --- a/mm/slab.c > +++ b/mm/slab.c > @@ -1229,6 +1229,61 @@ static void __init set_up_node(struct kmem_cache *cachep, int index) > } > } > > +#ifdef CONFIG_FREELIST_RANDOM > +/* > + * Master lists are pre-computed random lists > + * Lists of different sizes are used to optimize performance on different > + * SLAB object sizes per pages. "object sizes per pages" doesn't make sense. "object-per-page counts"? "object sizes"? > + */ > +static freelist_idx_t master_list_2[2]; > +static freelist_idx_t master_list_4[4]; > +static freelist_idx_t master_list_8[8]; > +static freelist_idx_t master_list_16[16]; > +static freelist_idx_t master_list_32[32]; > +static freelist_idx_t master_list_64[64]; > +static freelist_idx_t master_list_128[128]; > +static freelist_idx_t master_list_256[256]; > +static struct m_list { > + size_t count; > + freelist_idx_t *list; > +} master_lists[] = { > + { ARRAY_SIZE(master_list_2), master_list_2 }, > + { ARRAY_SIZE(master_list_4), master_list_4 }, > + { ARRAY_SIZE(master_list_8), master_list_8 }, > + { ARRAY_SIZE(master_list_16), master_list_16 }, > + { ARRAY_SIZE(master_list_32), master_list_32 }, > + { ARRAY_SIZE(master_list_64), master_list_64 }, > + { ARRAY_SIZE(master_list_128), master_list_128 }, > + { ARRAY_SIZE(master_list_256), master_list_256 }, > +}; > + > +static void __init freelist_random_init(void) > +{ > + unsigned int seed; > + size_t z, i, rand; > + struct rnd_state slab_rand; > + > + get_random_bytes_arch(&seed, sizeof(seed)); Using get_random_bytes_arch() seems a rather poor decision. There are the caveats described at the get_random_bytes_arch() definition site, and the minor issue that get_random_bytes_arch() only actually works on x86 and powerpc! This is run-once __init code, so rather than adding the kernel's very first get_random_bytes_arch() call site(!), why not stick with good old get_random_bytes()? If there's something I'm missing, please at least place a very good comment here explaining the reasoning. > + prandom_seed_state(&slab_rand, seed); > + > + for (z = 0; z < ARRAY_SIZE(master_lists); z++) { > + for (i = 0; i < master_lists[z].count; i++) > + master_lists[z].list[i] = i; > + > + /* Fisher-Yates shuffle */ > + for (i = master_lists[z].count - 1; i > 0; i--) { > + rand = prandom_u32_state(&slab_rand); > + rand %= (i + 1); > + swap(master_lists[z].list[i], > + master_lists[z].list[rand]); > + } > + } > +} > +#else > +static inline void __init freelist_random_init(void) { } > +#endif /* CONFIG_FREELIST_RANDOM */ > + > + > /* > * Initialisation. Called after the page allocator have been initialised and > * before smp_init(). > > ... > > @@ -2442,6 +2499,101 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page) > #endif > } > > +#ifdef CONFIG_FREELIST_RANDOM > +enum master_type { > + match, > + less, > + more > +}; > + > +struct random_mng { I can't work out what "mng" means in this code. > + unsigned int padding; > + unsigned int pos; > + unsigned int count; > + struct m_list master_list; > + unsigned int master_count; > + enum master_type type; > +}; It would be useful to document the above struct. Skilfully documenting the data structures is key to making the code understandable. > +static void random_mng_initialize(struct random_mng *mng, unsigned int count) > +{ > + unsigned int idx; > + const unsigned int last_idx = ARRAY_SIZE(master_lists) - 1; > + > + memset(mng, 0, sizeof(*mng)); > + mng->count = count; > + mng->pos = 0; > + /* count is >= 2 */ > + idx = ilog2(count) - 1; slab.c should now include log2.h. > + if (idx >= last_idx) > + idx = last_idx; > + else if (roundup_pow_of_two(idx + 1) != count) > + idx++; > + mng->master_list = master_lists[idx]; > + if (mng->master_list.count == mng->count) > + mng->type = match; > + else if (mng->master_list.count > mng->count) > + mng->type = more; > + else > + mng->type = less; > +} > + > +static freelist_idx_t get_next_entry(struct random_mng *mng) > +{ > + if (mng->type == less && mng->pos == mng->master_list.count) { > + mng->padding += mng->pos; > + mng->pos = 0; > + } > + BUG_ON(mng->pos >= mng->master_list.count); > + return mng->master_list.list[mng->pos++]; > +} > + > +static freelist_idx_t next_random_slot(struct random_mng *mng) > +{ > + freelist_idx_t cur, entry; > + > + entry = get_next_entry(mng); > + > + if (mng->type != match) { > + while ((entry + mng->padding) >= mng->count) > + entry = get_next_entry(mng); > + cur = entry + mng->padding; > + BUG_ON(cur >= mng->count); > + } else { > + cur = entry; > + } > + > + return cur; > +} > + > +static void shuffle_freelist(struct kmem_cache *cachep, struct page *page, > + unsigned int count) > +{ > + unsigned int i; > + struct random_mng mng; > + > + if (count < 2) { > + for (i = 0; i < count; i++) > + set_free_obj(page, i, i); > + return; > + } > + > + /* Last chunk is used already in this case */ > + if (OBJFREELIST_SLAB(cachep)) > + count--; > + > + random_mng_initialize(&mng, count); > + for (i = 0; i < count; i++) > + set_free_obj(page, i, next_random_slot(&mng)); > + > + if (OBJFREELIST_SLAB(cachep)) > + set_free_obj(page, i, i); > +} Sorry, but the code is really too light on comments. Each of the above functions would benefit from a description of what they do and, most importantly, why they do it. Please address these things and let's wait for the slab maintainers (and perhaps Kees?) to weigh in. > +#else > +static inline void shuffle_freelist(struct kmem_cache *cachep, > + struct page *page, unsigned int count) { } > +#endif /* CONFIG_FREELIST_RANDOM */ > + > static void cache_init_objs(struct kmem_cache *cachep, > struct page *page) > { > @@ -2464,8 +2616,12 @@ static void cache_init_objs(struct kmem_cache *cachep, > kasan_poison_object_data(cachep, objp); > } > > - set_free_obj(page, i, i); > + /* If enabled, initialization is done in shuffle_freelist */ > + if (!config_enabled(CONFIG_FREELIST_RANDOM)) > + set_free_obj(page, i, i); > } > + > + shuffle_freelist(cachep, page, cachep->num); > } > > static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>