Hi, On 1/24/2023 7:33 PM, Sasha Levin wrote: > This is a note to let you know that I've just added the patch titled > > bpf: Always use raw spinlock for hash bucket lock > > to the 5.15-stable tree which can be found at: > http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary > > The filename of the patch is: > bpf-always-use-raw-spinlock-for-hash-bucket-lock.patch > and it can be found in the queue-5.15 subdirectory. > > If you, or anyone else, feels it should not be added to the stable tree, > please let <stable@xxxxxxxxxxxxxxx> know about it. Please drop it for v5.15. The fix depends on bpf memory allocator [0] which was merged in v6.1. [0]: 7c8199e24fa0 bpf: Introduce any context BPF specific memory allocator. > > > > commit 9515b63fddd8c96797b0513c8d6509a9cc767611 > Author: Hou Tao <houtao1@xxxxxxxxxx> > Date: Wed Sep 21 15:38:26 2022 +0800 > > bpf: Always use raw spinlock for hash bucket lock > > [ Upstream commit 1d8b82c613297f24354b4d750413a7456b5cd92c ] > > For a non-preallocated hash map on RT kernel, regular spinlock instead > of raw spinlock is used for bucket lock. The reason is that on RT kernel > memory allocation is forbidden under atomic context and regular spinlock > is sleepable under RT. > > Now hash map has been fully converted to use bpf_map_alloc, and there > will be no synchronous memory allocation for non-preallocated hash map, > so it is safe to always use raw spinlock for bucket lock on RT. So > removing the usage of htab_use_raw_lock() and updating the comments > accordingly. > > Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> > Link: https://lore.kernel.org/r/20220921073826.2365800-1-houtao@xxxxxxxxxxxxxxx > Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx> > Stable-dep-of: 9f907439dc80 ("bpf: hash map, avoid deadlock with suitable hash mask") > Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > index e7f45a966e6b..ea2051a913fb 100644 > --- a/kernel/bpf/hashtab.c > +++ b/kernel/bpf/hashtab.c > @@ -66,24 +66,16 @@ > * In theory the BPF locks could be converted to regular spinlocks as well, > * but the bucket locks and percpu_freelist locks can be taken from > * arbitrary contexts (perf, kprobes, tracepoints) which are required to be > - * atomic contexts even on RT. These mechanisms require preallocated maps, > - * so there is no need to invoke memory allocations within the lock held > - * sections. > - * > - * BPF maps which need dynamic allocation are only used from (forced) > - * thread context on RT and can therefore use regular spinlocks which in > - * turn allows to invoke memory allocations from the lock held section. > - * > - * On a non RT kernel this distinction is neither possible nor required. > - * spinlock maps to raw_spinlock and the extra code is optimized out by the > - * compiler. > + * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, > + * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, > + * because there is no memory allocation within the lock held sections. However > + * after hash map was fully converted to use bpf_mem_alloc, there will be > + * non-synchronous memory allocation for non-preallocated hash map, so it is > + * safe to always use raw spinlock for bucket lock. > */ > struct bucket { > struct hlist_nulls_head head; > - union { > - raw_spinlock_t raw_lock; > - spinlock_t lock; > - }; > + raw_spinlock_t raw_lock; > }; > > #define HASHTAB_MAP_LOCK_COUNT 8 > @@ -132,26 +124,15 @@ static inline bool htab_is_prealloc(const struct bpf_htab *htab) > return !(htab->map.map_flags & BPF_F_NO_PREALLOC); > } > > -static inline bool htab_use_raw_lock(const struct bpf_htab *htab) > -{ > - return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab)); > -} > - > static void htab_init_buckets(struct bpf_htab *htab) > { > unsigned i; > > for (i = 0; i < htab->n_buckets; i++) { > INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); > - if (htab_use_raw_lock(htab)) { > - raw_spin_lock_init(&htab->buckets[i].raw_lock); > - lockdep_set_class(&htab->buckets[i].raw_lock, > + raw_spin_lock_init(&htab->buckets[i].raw_lock); > + lockdep_set_class(&htab->buckets[i].raw_lock, > &htab->lockdep_key); > - } else { > - spin_lock_init(&htab->buckets[i].lock); > - lockdep_set_class(&htab->buckets[i].lock, > - &htab->lockdep_key); > - } > cond_resched(); > } > } > @@ -161,28 +142,17 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, > unsigned long *pflags) > { > unsigned long flags; > - bool use_raw_lock; > > hash = hash & HASHTAB_MAP_LOCK_MASK; > > - use_raw_lock = htab_use_raw_lock(htab); > - if (use_raw_lock) > - preempt_disable(); > - else > - migrate_disable(); > + preempt_disable(); > if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { > __this_cpu_dec(*(htab->map_locked[hash])); > - if (use_raw_lock) > - preempt_enable(); > - else > - migrate_enable(); > + preempt_enable(); > return -EBUSY; > } > > - if (use_raw_lock) > - raw_spin_lock_irqsave(&b->raw_lock, flags); > - else > - spin_lock_irqsave(&b->lock, flags); > + raw_spin_lock_irqsave(&b->raw_lock, flags); > *pflags = flags; > > return 0; > @@ -192,18 +162,10 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, > struct bucket *b, u32 hash, > unsigned long flags) > { > - bool use_raw_lock = htab_use_raw_lock(htab); > - > hash = hash & HASHTAB_MAP_LOCK_MASK; > - if (use_raw_lock) > - raw_spin_unlock_irqrestore(&b->raw_lock, flags); > - else > - spin_unlock_irqrestore(&b->lock, flags); > + raw_spin_unlock_irqrestore(&b->raw_lock, flags); > __this_cpu_dec(*(htab->map_locked[hash])); > - if (use_raw_lock) > - preempt_enable(); > - else > - migrate_enable(); > + preempt_enable(); > } > > static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); > .