On Fri, Oct 21, 2022 at 09:18:12PM +0200, Jiri Kosina wrote: > From: Jiri Kosina <jkosina@xxxxxxx> > > The list_lock can be taken in hardirq context when do_drain() is being > called via IPI on all cores, and therefore lockdep complains about it, > because it can't be preempted on PREEMPT_RT. > > That's not a real issue, as SLAB can't be built on PREEMPT_RT anyway, but > we still want to get rid of the warning on non-PREEMPT_RT builds. > > Annotate it therefore as a raw lock in order to get rid of he lockdep > warning below. > > ============================= > [ BUG: Invalid wait context ] > 6.1.0-rc1-00134-ge35184f32151 #4 Not tainted > ----------------------------- > swapper/3/0 is trying to lock: > ffff8bc88086dc18 (&parent->list_lock){..-.}-{3:3}, at: do_drain+0x57/0xb0 > other info that might help us debug this: > context-{2:2} > no locks held by swapper/3/0. > stack backtrace: > CPU: 3 PID: 0 Comm: swapper/3 Not tainted 6.1.0-rc1-00134-ge35184f32151 #4 > Hardware name: LENOVO 20K5S22R00/20K5S22R00, BIOS R0IET38W (1.16 ) 05/31/2017 > Call Trace: > <IRQ> > dump_stack_lvl+0x6b/0x9d > __lock_acquire+0x1519/0x1730 > ? build_sched_domains+0x4bd/0x1590 > ? __lock_acquire+0xad2/0x1730 > lock_acquire+0x294/0x340 > ? do_drain+0x57/0xb0 > ? sched_clock_tick+0x41/0x60 > _raw_spin_lock+0x2c/0x40 > ? do_drain+0x57/0xb0 > do_drain+0x57/0xb0 > __flush_smp_call_function_queue+0x138/0x220 > __sysvec_call_function+0x4f/0x210 > sysvec_call_function+0x4b/0x90 > </IRQ> > <TASK> > asm_sysvec_call_function+0x16/0x20 > RIP: 0010:mwait_idle+0x5e/0x80 > Code: 31 d2 65 48 8b 04 25 80 ed 01 00 48 89 d1 0f 01 c8 48 8b 00 a8 08 75 14 66 90 0f 00 2d 0b 78 46 00 31 c0 48 89 c1 fb 0f 01 c9 <eb> 06 fb 0f 1f 44 00 00 65 48 8b 04 25 80 ed 01 00 f0 80 60 02 df > RSP: 0000:ffffa90940217ee0 EFLAGS: 00000246 > RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 > RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff9bb9f93a > RBP: 0000000000000003 R08: 0000000000000001 R09: 0000000000000001 > R10: ffffa90940217ea8 R11: 0000000000000000 R12: ffffffffffffffff > R13: 0000000000000000 R14: ffff8bc88127c500 R15: 0000000000000000 > ? default_idle_call+0x1a/0xa0 > default_idle_call+0x4b/0xa0 > do_idle+0x1f1/0x2c0 > ? _raw_spin_unlock_irqrestore+0x56/0x70 > cpu_startup_entry+0x19/0x20 > start_secondary+0x122/0x150 > secondary_startup_64_no_verify+0xce/0xdb > </TASK> > Looks good to me. Reviewed-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx> > Signed-off-by: Jiri Kosina <jkosina@xxxxxxx> > --- > > v1->v2: fix !SLAB build failures due to list_lock mismatch > v2->v3: really fix it by sending refreshed version of the patch (facepalm) > > mm/slab.c | 90 +++++++++++++++++++++++++++---------------------------- > mm/slab.h | 4 +++ > 2 files changed, 49 insertions(+), 45 deletions(-) > > diff --git a/mm/slab.c b/mm/slab.c > index 59c8e28f7b6a..d8a287900193 100644 > --- a/mm/slab.c > +++ b/mm/slab.c > @@ -234,7 +234,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) > parent->shared = NULL; > parent->alien = NULL; > parent->colour_next = 0; > - spin_lock_init(&parent->list_lock); > + raw_spin_lock_init(&parent->list_lock); > parent->free_objects = 0; > parent->free_touched = 0; > } > @@ -559,9 +559,9 @@ static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep, > slab_node = slab_nid(slab); > n = get_node(cachep, slab_node); > > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > free_block(cachep, &objp, 1, slab_node, &list); > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > > slabs_destroy(cachep, &list); > } > @@ -684,7 +684,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, > struct kmem_cache_node *n = get_node(cachep, node); > > if (ac->avail) { > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > /* > * Stuff objects into the remote nodes shared array first. > * That way we could avoid the overhead of putting the objects > @@ -695,7 +695,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, > > free_block(cachep, ac->entry, ac->avail, node, list); > ac->avail = 0; > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > } > } > > @@ -768,9 +768,9 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp, > slabs_destroy(cachep, &list); > } else { > n = get_node(cachep, slab_node); > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > free_block(cachep, &objp, 1, slab_node, &list); > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > slabs_destroy(cachep, &list); > } > return 1; > @@ -811,10 +811,10 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) > */ > n = get_node(cachep, node); > if (n) { > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + > cachep->num; > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > > return 0; > } > @@ -893,7 +893,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, > goto fail; > > n = get_node(cachep, node); > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > if (n->shared && force_change) { > free_block(cachep, n->shared->entry, > n->shared->avail, node, &list); > @@ -911,7 +911,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, > new_alien = NULL; > } > > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > slabs_destroy(cachep, &list); > > /* > @@ -950,7 +950,7 @@ static void cpuup_canceled(long cpu) > if (!n) > continue; > > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > > /* Free limit for this kmem_cache_node */ > n->free_limit -= cachep->batchcount; > @@ -961,7 +961,7 @@ static void cpuup_canceled(long cpu) > nc->avail = 0; > > if (!cpumask_empty(mask)) { > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > goto free_slab; > } > > @@ -975,7 +975,7 @@ static void cpuup_canceled(long cpu) > alien = n->alien; > n->alien = NULL; > > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > > kfree(shared); > if (alien) { > @@ -1159,7 +1159,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node * > /* > * Do not assume that spinlocks can be initialized via memcpy: > */ > - spin_lock_init(&ptr->list_lock); > + raw_spin_lock_init(&ptr->list_lock); > > MAKE_ALL_LISTS(cachep, ptr, nodeid); > cachep->node[nodeid] = ptr; > @@ -1330,11 +1330,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) > for_each_kmem_cache_node(cachep, node, n) { > unsigned long total_slabs, free_slabs, free_objs; > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > total_slabs = n->total_slabs; > free_slabs = n->free_slabs; > free_objs = n->free_objects; > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > > pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n", > node, total_slabs - free_slabs, total_slabs, > @@ -2096,7 +2096,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep) > { > #ifdef CONFIG_SMP > check_irq_off(); > - assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); > + assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); > #endif > } > > @@ -2104,7 +2104,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) > { > #ifdef CONFIG_SMP > check_irq_off(); > - assert_spin_locked(&get_node(cachep, node)->list_lock); > + assert_raw_spin_locked(&get_node(cachep, node)->list_lock); > #endif > } > > @@ -2144,9 +2144,9 @@ static void do_drain(void *arg) > check_irq_off(); > ac = cpu_cache_get(cachep); > n = get_node(cachep, node); > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > free_block(cachep, ac->entry, ac->avail, node, &list); > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > ac->avail = 0; > slabs_destroy(cachep, &list); > } > @@ -2164,9 +2164,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep) > drain_alien_cache(cachep, n->alien); > > for_each_kmem_cache_node(cachep, node, n) { > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > drain_array_locked(cachep, n->shared, node, true, &list); > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > > slabs_destroy(cachep, &list); > } > @@ -2188,10 +2188,10 @@ static int drain_freelist(struct kmem_cache *cache, > nr_freed = 0; > while (nr_freed < tofree && !list_empty(&n->slabs_free)) { > > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > p = n->slabs_free.prev; > if (p == &n->slabs_free) { > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > goto out; > } > > @@ -2204,7 +2204,7 @@ static int drain_freelist(struct kmem_cache *cache, > * to the cache. > */ > n->free_objects -= cache->num; > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > slab_destroy(cache, slab); > nr_freed++; > } > @@ -2629,7 +2629,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab) > INIT_LIST_HEAD(&slab->slab_list); > n = get_node(cachep, slab_nid(slab)); > > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > n->total_slabs++; > if (!slab->active) { > list_add_tail(&slab->slab_list, &n->slabs_free); > @@ -2639,7 +2639,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab) > > STATS_INC_GROWN(cachep); > n->free_objects += cachep->num - slab->active; > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > > fixup_objfreelist_debug(cachep, &list); > } > @@ -2805,7 +2805,7 @@ static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) > { > struct slab *slab; > > - assert_spin_locked(&n->list_lock); > + assert_raw_spin_locked(&n->list_lock); > slab = list_first_entry_or_null(&n->slabs_partial, struct slab, > slab_list); > if (!slab) { > @@ -2832,10 +2832,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, > if (!gfp_pfmemalloc_allowed(flags)) > return NULL; > > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > slab = get_first_slab(n, true); > if (!slab) { > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > return NULL; > } > > @@ -2844,7 +2844,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, > > fixup_slab_list(cachep, n, slab, &list); > > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > fixup_objfreelist_debug(cachep, &list); > > return obj; > @@ -2903,7 +2903,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) > if (!n->free_objects && (!shared || !shared->avail)) > goto direct_grow; > > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > shared = READ_ONCE(n->shared); > > /* See if we can refill from the shared array */ > @@ -2927,7 +2927,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) > must_grow: > n->free_objects -= ac->avail; > alloc_done: > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > fixup_objfreelist_debug(cachep, &list); > > direct_grow: > @@ -3147,7 +3147,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, > BUG_ON(!n); > > check_irq_off(); > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > slab = get_first_slab(n, false); > if (!slab) > goto must_grow; > @@ -3165,12 +3165,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, > > fixup_slab_list(cachep, n, slab, &list); > > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > fixup_objfreelist_debug(cachep, &list); > return obj; > > must_grow: > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); > if (slab) { > /* This slab isn't counted yet so don't update free_objects */ > @@ -3325,7 +3325,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) > > check_irq_off(); > n = get_node(cachep, node); > - spin_lock(&n->list_lock); > + raw_spin_lock(&n->list_lock); > if (n->shared) { > struct array_cache *shared_array = n->shared; > int max = shared_array->limit - shared_array->avail; > @@ -3354,7 +3354,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) > STATS_SET_FREEABLE(cachep, i); > } > #endif > - spin_unlock(&n->list_lock); > + raw_spin_unlock(&n->list_lock); > ac->avail -= batchcount; > memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); > slabs_destroy(cachep, &list); > @@ -3721,9 +3721,9 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, > > node = cpu_to_mem(cpu); > n = get_node(cachep, node); > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > free_block(cachep, ac->entry, ac->avail, node, &list); > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > slabs_destroy(cachep, &list); > } > free_percpu(prev); > @@ -3815,9 +3815,9 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, > return; > } > > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > drain_array_locked(cachep, ac, node, false, &list); > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > > slabs_destroy(cachep, &list); > } > @@ -3901,7 +3901,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) > > for_each_kmem_cache_node(cachep, node, n) { > check_irq_on(); > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > > total_slabs += n->total_slabs; > free_slabs += n->free_slabs; > @@ -3910,7 +3910,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) > if (n->shared) > shared_avail += n->shared->avail; > > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > } > num_objs = total_slabs * cachep->num; > active_slabs = total_slabs - free_slabs; > diff --git a/mm/slab.h b/mm/slab.h > index 0202a8c2f0d2..7a705e4228c8 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -750,7 +750,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, > * The slab lists for all objects. > */ > struct kmem_cache_node { > +#ifdef CONFIG_SLAB > + raw_spinlock_t list_lock; > +#else > spinlock_t list_lock; > +#endif > > #ifdef CONFIG_SLAB > struct list_head slabs_partial; /* partial list first, better asm code */ > -- > 2.35.3 > -- Thanks, Hyeonggon