On Thu, 2021-07-15 at 18:34 +0200, Mike Galbraith wrote: > Greetings crickets, > > Methinks he problem is the hole these patches opened only for RT. > > static void put_cpu_partial(struct kmem_cache *s, struct page *page, > int drain) > { > #ifdef CONFIG_SLUB_CPU_PARTIAL > struct page *oldpage; > int pages; > int pobjects; > > slub_get_cpu_ptr(s->cpu_slab); > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Bah, I'm tired of waiting to see what if anything mm folks do about this little bugger, so I'm gonna step on it my damn self and be done with it. Fly or die little patchlet. mm/slub: restore/expand unfreeze_partials() local exclusion scope 2180da7ea70a ("mm, slub: use migrate_disable() on PREEMPT_RT") replaced preempt_disable() in put_cpu_partial() with migrate_disable(), which when combined with ___slab_alloc() having become preemptibile, leads to kmem_cache_free()/kfree() blowing through ___slab_alloc() unimpeded, and vice versa, resulting in PREMPT_RT exclusive explosions in both paths while stress testing with both SLUB_CPU_PARTIAL/MEMCG enabled, ___slab_alloc() during allocation (duh), and __unfreeze_partials() during free, both while accessing an unmapped page->freelist. Serialize put_cpu_partial()/unfreeze_partials() on cpu_slab->lock to ensure that alloc/free paths cannot pluck cpu_slab->partial out from underneath each other unconstrained. Signed-off-by: Mike Galbraith <efault@xxxxxx> Fixes: 2180da7ea70a ("mm, slub: use migrate_disable() on PREEMPT_RT") --- mm/slub.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) --- a/mm/slub.c +++ b/mm/slub.c @@ -2418,6 +2418,17 @@ static void __unfreeze_partials(struct k if (n) spin_unlock_irqrestore(&n->list_lock, flags); + /* + * If we got here via __slab_free() -> put_cpu_partial(), + * memcg_free_page_obj_cgroups() ->kfree() may send us + * back to __slab_free() -> put_cpu_partial() for an + * unfortunate second encounter with cpu_slab->lock. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled()) { + lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock)); + local_unlock(&s->cpu_slab->lock); + } + while (discard_page) { page = discard_page; discard_page = discard_page->next; @@ -2426,6 +2437,9 @@ static void __unfreeze_partials(struct k discard_slab(s, page); stat(s, FREE_SLAB); } + + if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled()) + local_lock(&s->cpu_slab->lock); } /* @@ -2497,7 +2511,9 @@ static void put_cpu_partial(struct kmem_ * partial array is full. Move the existing * set to the per node partial list. */ + local_lock(&s->cpu_slab->lock); unfreeze_partials(s); + local_unlock(&s->cpu_slab->lock); oldpage = NULL; pobjects = 0; pages = 0; @@ -2579,7 +2595,9 @@ static void flush_cpu_slab(struct work_s if (c->page) flush_slab(s, c, true); + local_lock(&s->cpu_slab->lock); unfreeze_partials(s); + local_unlock(&s->cpu_slab->lock); } static bool has_cpu_slab(int cpu, struct kmem_cache *s) @@ -2632,8 +2650,11 @@ static int slub_cpu_dead(unsigned int cp struct kmem_cache *s; mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_caches, list) + list_for_each_entry(s, &slab_caches, list) { + local_lock(&s->cpu_slab->lock); __flush_cpu_slab(s, cpu); + local_unlock(&s->cpu_slab->lock); + } mutex_unlock(&slab_mutex); return 0; }