On 4/11/23 15:08, Qi Zheng wrote: > The list_lock can be held in the critical section of > raw_spinlock, and then lockdep will complain about it > like below: > > ============================= > [ BUG: Invalid wait context ] > 6.3.0-rc6-next-20230411 #7 Not tainted > ----------------------------- > swapper/0/1 is trying to lock: > ffff888100055418 (&n->list_lock){....}-{3:3}, at: ___slab_alloc+0x73d/0x1330 > other info that might help us debug this: > context-{5:5} > 2 locks held by swapper/0/1: > #0: ffffffff824e8160 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic+0x22/0x2d0 > #1: ffff888136bede50 (&ACCESS_PRIVATE(rtpcp, lock)){....}-{2:2}, at: cblist_init_generic+0x232/0x2d0 > stack backtrace: > CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc6-next-20230411 #7 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 > Call Trace: > <TASK> > dump_stack_lvl+0x77/0xc0 > __lock_acquire+0xa65/0x2950 > ? arch_stack_walk+0x65/0xf0 > ? arch_stack_walk+0x65/0xf0 > ? unwind_next_frame+0x602/0x8d0 > lock_acquire+0xe0/0x300 > ? ___slab_alloc+0x73d/0x1330 > ? find_usage_forwards+0x39/0x50 > ? check_irq_usage+0x162/0xa70 > ? __bfs+0x10c/0x2c0 > _raw_spin_lock_irqsave+0x4f/0x90 > ? ___slab_alloc+0x73d/0x1330 > ___slab_alloc+0x73d/0x1330 > ? fill_pool+0x16b/0x2a0 > ? look_up_lock_class+0x5d/0x160 > ? register_lock_class+0x48/0x500 > ? __lock_acquire+0xabc/0x2950 > ? fill_pool+0x16b/0x2a0 > kmem_cache_alloc+0x358/0x3b0 > ? __lock_acquire+0xabc/0x2950 > fill_pool+0x16b/0x2a0 > ? __debug_object_init+0x292/0x560 > ? lock_acquire+0xe0/0x300 > ? cblist_init_generic+0x232/0x2d0 > __debug_object_init+0x2c/0x560 > cblist_init_generic+0x147/0x2d0 > rcu_init_tasks_generic+0x15/0x190 > kernel_init_freeable+0x6e/0x3e0 > ? rest_init+0x1e0/0x1e0 > kernel_init+0x1b/0x1d0 > ? rest_init+0x1e0/0x1e0 > ret_from_fork+0x1f/0x30 > </TASK> > > The fill_pool() can only be called in the !PREEMPT_RT kernel > or in the preemptible context of the PREEMPT_RT kernel, so > the above warning is not a real issue, but it's better to > annotate kmem_cache_node->list_lock as raw_spinlock to get > rid of such issue. + CC some RT and RCU people AFAIK raw_spinlock is not just an annotation, but on RT it changes the implementation from preemptible mutex to actual spin lock, so it would be rather unfortunate to do that for a spurious warning. Can it be somehow fixed in a better way? > Reported-by: Zhao Gongyi <zhaogongyi@xxxxxxxxxxxxx> > Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> > --- > mm/slab.h | 4 ++-- > mm/slub.c | 66 +++++++++++++++++++++++++++---------------------------- > 2 files changed, 35 insertions(+), 35 deletions(-) > > diff --git a/mm/slab.h b/mm/slab.h > index f01ac256a8f5..43f3436d13b4 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -723,8 +723,9 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, > * The slab lists for all objects. > */ > struct kmem_cache_node { > -#ifdef CONFIG_SLAB > raw_spinlock_t list_lock; > + > +#ifdef CONFIG_SLAB > struct list_head slabs_partial; /* partial list first, better asm code */ > struct list_head slabs_full; > struct list_head slabs_free; > @@ -740,7 +741,6 @@ struct kmem_cache_node { > #endif > > #ifdef CONFIG_SLUB > - spinlock_t list_lock; > unsigned long nr_partial; > struct list_head partial; > #ifdef CONFIG_SLUB_DEBUG > diff --git a/mm/slub.c b/mm/slub.c > index c87628cd8a9a..e66a35643624 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1331,7 +1331,7 @@ static void add_full(struct kmem_cache *s, > if (!(s->flags & SLAB_STORE_USER)) > return; > > - lockdep_assert_held(&n->list_lock); > + assert_raw_spin_locked(&n->list_lock); > list_add(&slab->slab_list, &n->full); > } > > @@ -1340,7 +1340,7 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct > if (!(s->flags & SLAB_STORE_USER)) > return; > > - lockdep_assert_held(&n->list_lock); > + assert_raw_spin_locked(&n->list_lock); > list_del(&slab->slab_list); > } > > @@ -2113,14 +2113,14 @@ __add_partial(struct kmem_cache_node *n, struct slab *slab, int tail) > static inline void add_partial(struct kmem_cache_node *n, > struct slab *slab, int tail) > { > - lockdep_assert_held(&n->list_lock); > + assert_raw_spin_locked(&n->list_lock); > __add_partial(n, slab, tail); > } > > static inline void remove_partial(struct kmem_cache_node *n, > struct slab *slab) > { > - lockdep_assert_held(&n->list_lock); > + assert_raw_spin_locked(&n->list_lock); > list_del(&slab->slab_list); > n->nr_partial--; > } > @@ -2136,7 +2136,7 @@ static void *alloc_single_from_partial(struct kmem_cache *s, > { > void *object; > > - lockdep_assert_held(&n->list_lock); > + assert_raw_spin_locked(&n->list_lock); > > object = slab->freelist; > slab->freelist = get_freepointer(s, object); > @@ -2181,7 +2181,7 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, > */ > return NULL; > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > > if (slab->inuse == slab->objects) > add_full(s, n, slab); > @@ -2189,7 +2189,7 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, > add_partial(n, slab, DEACTIVATE_TO_HEAD); > > inc_slabs_node(s, nid, slab->objects); > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > > return object; > } > @@ -2208,7 +2208,7 @@ static inline void *acquire_slab(struct kmem_cache *s, > unsigned long counters; > struct slab new; > > - lockdep_assert_held(&n->list_lock); > + assert_raw_spin_locked(&n->list_lock); > > /* > * Zap the freelist and set the frozen bit. > @@ -2267,7 +2267,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, > if (!n || !n->nr_partial) > return NULL; > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) { > void *t; > > @@ -2304,7 +2304,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, > #endif > > } > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > return object; > } > > @@ -2548,7 +2548,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, > * Taking the spinlock removes the possibility that > * acquire_slab() will see a slab that is frozen > */ > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > } else { > mode = M_FULL_NOLIST; > } > @@ -2559,14 +2559,14 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, > new.freelist, new.counters, > "unfreezing slab")) { > if (mode == M_PARTIAL) > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > goto redo; > } > > > if (mode == M_PARTIAL) { > add_partial(n, slab, tail); > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > stat(s, tail); > } else if (mode == M_FREE) { > stat(s, DEACTIVATE_EMPTY); > @@ -2594,10 +2594,10 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) > n2 = get_node(s, slab_nid(slab)); > if (n != n2) { > if (n) > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > > n = n2; > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > } > > do { > @@ -2626,7 +2626,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) > } > > if (n) > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > > while (slab_to_discard) { > slab = slab_to_discard; > @@ -2951,10 +2951,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, > unsigned long x = 0; > struct slab *slab; > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > list_for_each_entry(slab, &n->partial, slab_list) > x += get_count(slab); > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > return x; > } > #endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */ > @@ -3515,7 +3515,7 @@ static noinline void free_to_partial_list( > if (s->flags & SLAB_STORE_USER) > handle = set_track_prepare(); > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > > if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) { > void *prior = slab->freelist; > @@ -3554,7 +3554,7 @@ static noinline void free_to_partial_list( > dec_slabs_node(s, slab_nid(slab_free), slab_free->objects); > } > > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > > if (slab_free) { > stat(s, FREE_SLAB); > @@ -3594,7 +3594,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, > > do { > if (unlikely(n)) { > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > n = NULL; > } > prior = slab->freelist; > @@ -3626,7 +3626,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, > * Otherwise the list_lock will synchronize with > * other processors updating the list of slabs. > */ > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > > } > } > @@ -3668,7 +3668,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, > add_partial(n, slab, DEACTIVATE_TO_TAIL); > stat(s, FREE_ADD_PARTIAL); > } > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > return; > > slab_empty: > @@ -3683,7 +3683,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, > remove_full(s, n, slab); > } > > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > stat(s, FREE_SLAB); > discard_slab(s, slab); > } > @@ -4180,7 +4180,7 @@ static void > init_kmem_cache_node(struct kmem_cache_node *n) > { > n->nr_partial = 0; > - spin_lock_init(&n->list_lock); > + raw_spin_lock_init(&n->list_lock); > INIT_LIST_HEAD(&n->partial); > #ifdef CONFIG_SLUB_DEBUG > atomic_long_set(&n->nr_slabs, 0); > @@ -4576,7 +4576,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) > struct slab *slab, *h; > > BUG_ON(irqs_disabled()); > - spin_lock_irq(&n->list_lock); > + raw_spin_lock_irq(&n->list_lock); > list_for_each_entry_safe(slab, h, &n->partial, slab_list) { > if (!slab->inuse) { > remove_partial(n, slab); > @@ -4586,7 +4586,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) > "Objects remaining in %s on __kmem_cache_shutdown()"); > } > } > - spin_unlock_irq(&n->list_lock); > + raw_spin_unlock_irq(&n->list_lock); > > list_for_each_entry_safe(slab, h, &discard, slab_list) > discard_slab(s, slab); > @@ -4790,7 +4790,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) > for (i = 0; i < SHRINK_PROMOTE_MAX; i++) > INIT_LIST_HEAD(promote + i); > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > > /* > * Build lists of slabs to discard or promote. > @@ -4822,7 +4822,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) > for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) > list_splice(promote + i, &n->partial); > > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > > /* Release empty slabs */ > list_for_each_entry_safe(slab, t, &discard, slab_list) > @@ -5147,7 +5147,7 @@ static int validate_slab_node(struct kmem_cache *s, > struct slab *slab; > unsigned long flags; > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > > list_for_each_entry(slab, &n->partial, slab_list) { > validate_slab(s, slab, obj_map); > @@ -5173,7 +5173,7 @@ static int validate_slab_node(struct kmem_cache *s, > } > > out: > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > return count; > } > > @@ -6399,12 +6399,12 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) > if (!atomic_long_read(&n->nr_slabs)) > continue; > > - spin_lock_irqsave(&n->list_lock, flags); > + raw_spin_lock_irqsave(&n->list_lock, flags); > list_for_each_entry(slab, &n->partial, slab_list) > process_slab(t, s, slab, alloc, obj_map); > list_for_each_entry(slab, &n->full, slab_list) > process_slab(t, s, slab, alloc, obj_map); > - spin_unlock_irqrestore(&n->list_lock, flags); > + raw_spin_unlock_irqrestore(&n->list_lock, flags); > } > > /* Sort locations by count */