On Mon, 30 Mar 2020 20:14:50 -0700 Dongli Zhang <dongli.zhang@xxxxxxxxxx> wrote: > The slub_debug is able to fix the corrupted slab freelist/page. However, > alloc_debug_processing() only checks the validity of current and next > freepointer during allocation path. As a result, once some objects have > their freepointers corrupted, deactivate_slab() may lead to page fault. > > Below is from a test kernel module when > 'slub_debug=PUF,kmalloc-128 slub_nomerge'. The test kernel corrupts the > freepointer of one free object on purpose. Unfortunately, deactivate_slab() > does not detect it when iterating the freechain. > > ... > > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -2082,6 +2082,20 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, > void *prior; > unsigned long counters; > > + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && > + !check_valid_pointer(s, page, nextfree)) { > + /* > + * If 'nextfree' is invalid, it is possible that > + * the object at 'freelist' is already corrupted. > + * Therefore, all objects starting at 'freelist' > + * are isolated. > + */ > + object_err(s, page, freelist, "Freechain corrupt"); > + freelist = NULL; > + slab_fix(s, "Isolate corrupted freechain"); > + break; > + } > + > do { > prior = page->freelist; > counters = page->counters; We could do it this way: --- a/mm/slub.c~mm-slub-fix-corrupted-freechain-in-deactivate_slab-fix +++ a/mm/slub.c @@ -2083,6 +2083,7 @@ static void deactivate_slab(struct kmem_ void *prior; unsigned long counters; +#ifdef CONFIG_SLAB_DEBUG if ((s->flags & SLAB_CONSISTENCY_CHECKS) && !check_valid_pointer(s, page, nextfree)) { /* @@ -2096,6 +2097,7 @@ static void deactivate_slab(struct kmem_ slab_fix(s, "Isolate corrupted freechain"); break; } +#endif do { prior = page->freelist; But it's a bit ugly. How about this? --- a/mm/slub.c~mm-slub-fix-corrupted-freechain-in-deactivate_slab-fix +++ a/mm/slub.c @@ -650,6 +650,20 @@ static void slab_bug(struct kmem_cache * va_end(args); } +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, + void *freelist, void *nextfree) +{ + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && + !check_valid_pointer(s, page, nextfree)) { + object_err(s, page, freelist, "Freechain corrupt"); + freelist = NULL; + slab_fix(s, "Isolate corrupted freechain"); + return true; + } + + return false; +} + static void slab_fix(struct kmem_cache *s, char *fmt, ...) { struct va_format vaf; @@ -1400,6 +1414,11 @@ static inline void inc_slabs_node(struct static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, + void *freelist, void *nextfree) +{ + return false; +} #endif /* CONFIG_SLUB_DEBUG */ /* @@ -2083,19 +2102,13 @@ static void deactivate_slab(struct kmem_ void *prior; unsigned long counters; - if ((s->flags & SLAB_CONSISTENCY_CHECKS) && - !check_valid_pointer(s, page, nextfree)) { - /* - * If 'nextfree' is invalid, it is possible that - * the object at 'freelist' is already corrupted. - * Therefore, all objects starting at 'freelist' - * are isolated. - */ - object_err(s, page, freelist, "Freechain corrupt"); - freelist = NULL; - slab_fix(s, "Isolate corrupted freechain"); + /* + * If 'nextfree' is invalid, it is possible that the object at + * 'freelist' is already corrupted. So isolate all objects + * starting at 'freelist'. + */ + if (freelist_corrupted(s, page, freelist, nextfree)) break; - } do { prior = page->freelist; _