Re: Patch "bpf: Always use raw spinlock for hash bucket lock" has been added to the 5.15-stable tree

Hou Tao <houtao1@xxxxxxxxxx> · Sat, 28 Jan 2023 10:21:43 +0800



Hi,

On 1/24/2023 7:33 PM, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
>     bpf: Always use raw spinlock for hash bucket lock
>
> to the 5.15-stable tree which can be found at:
>     http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
>
> The filename of the patch is:
>      bpf-always-use-raw-spinlock-for-hash-bucket-lock.patch
> and it can be found in the queue-5.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable@xxxxxxxxxxxxxxx> know about it.
Please drop it for v5.15. The fix depends on bpf memory allocator [0] which was
merged in v6.1.

[0]: 7c8199e24fa0 bpf: Introduce any context BPF specific memory allocator.
>
>
>
> commit 9515b63fddd8c96797b0513c8d6509a9cc767611
> Author: Hou Tao <houtao1@xxxxxxxxxx>
> Date:   Wed Sep 21 15:38:26 2022 +0800
>
>     bpf: Always use raw spinlock for hash bucket lock
>     
>     [ Upstream commit 1d8b82c613297f24354b4d750413a7456b5cd92c ]
>     
>     For a non-preallocated hash map on RT kernel, regular spinlock instead
>     of raw spinlock is used for bucket lock. The reason is that on RT kernel
>     memory allocation is forbidden under atomic context and regular spinlock
>     is sleepable under RT.
>     
>     Now hash map has been fully converted to use bpf_map_alloc, and there
>     will be no synchronous memory allocation for non-preallocated hash map,
>     so it is safe to always use raw spinlock for bucket lock on RT. So
>     removing the usage of htab_use_raw_lock() and updating the comments
>     accordingly.
>     
>     Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx>
>     Link: https://lore.kernel.org/r/20220921073826.2365800-1-houtao@xxxxxxxxxxxxxxx
>     Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
>     Stable-dep-of: 9f907439dc80 ("bpf: hash map, avoid deadlock with suitable hash mask")
>     Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
>
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index e7f45a966e6b..ea2051a913fb 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -66,24 +66,16 @@
>   * In theory the BPF locks could be converted to regular spinlocks as well,
>   * but the bucket locks and percpu_freelist locks can be taken from
>   * arbitrary contexts (perf, kprobes, tracepoints) which are required to be
> - * atomic contexts even on RT. These mechanisms require preallocated maps,
> - * so there is no need to invoke memory allocations within the lock held
> - * sections.
> - *
> - * BPF maps which need dynamic allocation are only used from (forced)
> - * thread context on RT and can therefore use regular spinlocks which in
> - * turn allows to invoke memory allocations from the lock held section.
> - *
> - * On a non RT kernel this distinction is neither possible nor required.
> - * spinlock maps to raw_spinlock and the extra code is optimized out by the
> - * compiler.
> + * atomic contexts even on RT. Before the introduction of bpf_mem_alloc,
> + * it is only safe to use raw spinlock for preallocated hash map on a RT kernel,
> + * because there is no memory allocation within the lock held sections. However
> + * after hash map was fully converted to use bpf_mem_alloc, there will be
> + * non-synchronous memory allocation for non-preallocated hash map, so it is
> + * safe to always use raw spinlock for bucket lock.
>   */
>  struct bucket {
>  	struct hlist_nulls_head head;
> -	union {
> -		raw_spinlock_t raw_lock;
> -		spinlock_t     lock;
> -	};
> +	raw_spinlock_t raw_lock;
>  };
>  
>  #define HASHTAB_MAP_LOCK_COUNT 8
> @@ -132,26 +124,15 @@ static inline bool htab_is_prealloc(const struct bpf_htab *htab)
>  	return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
>  }
>  
> -static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
> -{
> -	return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
> -}
> -
>  static void htab_init_buckets(struct bpf_htab *htab)
>  {
>  	unsigned i;
>  
>  	for (i = 0; i < htab->n_buckets; i++) {
>  		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
> -		if (htab_use_raw_lock(htab)) {
> -			raw_spin_lock_init(&htab->buckets[i].raw_lock);
> -			lockdep_set_class(&htab->buckets[i].raw_lock,
> +		raw_spin_lock_init(&htab->buckets[i].raw_lock);
> +		lockdep_set_class(&htab->buckets[i].raw_lock,
>  					  &htab->lockdep_key);
> -		} else {
> -			spin_lock_init(&htab->buckets[i].lock);
> -			lockdep_set_class(&htab->buckets[i].lock,
> -					  &htab->lockdep_key);
> -		}
>  		cond_resched();
>  	}
>  }
> @@ -161,28 +142,17 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
>  				   unsigned long *pflags)
>  {
>  	unsigned long flags;
> -	bool use_raw_lock;
>  
>  	hash = hash & HASHTAB_MAP_LOCK_MASK;
>  
> -	use_raw_lock = htab_use_raw_lock(htab);
> -	if (use_raw_lock)
> -		preempt_disable();
> -	else
> -		migrate_disable();
> +	preempt_disable();
>  	if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
>  		__this_cpu_dec(*(htab->map_locked[hash]));
> -		if (use_raw_lock)
> -			preempt_enable();
> -		else
> -			migrate_enable();
> +		preempt_enable();
>  		return -EBUSY;
>  	}
>  
> -	if (use_raw_lock)
> -		raw_spin_lock_irqsave(&b->raw_lock, flags);
> -	else
> -		spin_lock_irqsave(&b->lock, flags);
> +	raw_spin_lock_irqsave(&b->raw_lock, flags);
>  	*pflags = flags;
>  
>  	return 0;
> @@ -192,18 +162,10 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,
>  				      struct bucket *b, u32 hash,
>  				      unsigned long flags)
>  {
> -	bool use_raw_lock = htab_use_raw_lock(htab);
> -
>  	hash = hash & HASHTAB_MAP_LOCK_MASK;
> -	if (use_raw_lock)
> -		raw_spin_unlock_irqrestore(&b->raw_lock, flags);
> -	else
> -		spin_unlock_irqrestore(&b->lock, flags);
> +	raw_spin_unlock_irqrestore(&b->raw_lock, flags);
>  	__this_cpu_dec(*(htab->map_locked[hash]));
> -	if (use_raw_lock)
> -		preempt_enable();
> -	else
> -		migrate_enable();
> +	preempt_enable();
>  }
>  
>  static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
> .