Re: [PATCH bpf-next v3] bpf: fix NULL pointer dereference in bpf_get_local_storage() helper

Jiri Olsa <jolsa@xxxxxxxxxx> · Mon, 22 Mar 2021 21:47:01 +0100

On Sat, Mar 20, 2021 at 10:02:01AM -0700, Yonghong Song wrote:
> Jiri Olsa reported a bug ([1]) in kernel where cgroup local
> storage pointer may be NULL in bpf_get_local_storage() helper.
> There are two issues uncovered by this bug:
>   (1). kprobe or tracepoint prog incorrectly sets cgroup local storage
>        before prog run,
>   (2). due to change from preempt_disable to migrate_disable,
>        preemption is possible and percpu storage might be overwritten
>        by other tasks.
> 
> This issue (1) is fixed in [2]. This patch tried to address issue (2).
> The following shows how things can go wrong:
>   task 1:   bpf_cgroup_storage_set() for percpu local storage
>          preemption happens
>   task 2:   bpf_cgroup_storage_set() for percpu local storage
>          preemption happens
>   task 1:   run bpf program
> 
> task 1 will effectively use the percpu local storage setting by task 2
> which will be either NULL or incorrect ones.
> 
> Instead of just one common local storage per cpu, this patch fixed
> the issue by permitting 8 local storages per cpu and each local
> storage is identified by a task_struct pointer. This way, we
> allow at most 8 nested preemption between bpf_cgroup_storage_set()
> and bpf_cgroup_storage_unset(). The percpu local storage slot
> is released (calling bpf_cgroup_storage_unset()) by the same task
> after bpf program finished running.
> bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
> interface.
> 
> The patch is tested on top of [2] with reproducer in [1].
> Without this patch, kernel will emit error in 2-3 minutes.
> With this patch, after one hour, still no error.
> 
>  [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@xxxxxxxxxxxxxx/T
>  [2] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@xxxxxxxxxxxxxx/T

[1] and [2] are same link, you mean this one, right?
   05a68ce5fa51 bpf: Don't do bpf_cgroup_storage_set() for kuprobe/tp programs

I have troubles to apply this on bpf-next probably because
of dependencies, I'll wait for bpf-next is in sync with bpf
fixes.. or would you have a branch pushed out with this?

thanks for the fix,
jirka

> 
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Roman Gushchin <guro@xxxxxx>
> Signed-off-by: Yonghong Song <yhs@xxxxxx>
> ---
>  include/linux/bpf-cgroup.h | 57 ++++++++++++++++++++++++++++++++------
>  include/linux/bpf.h        | 22 ++++++++++++---
>  kernel/bpf/helpers.c       | 15 +++++++---
>  kernel/bpf/local_storage.c |  5 ++--
>  net/bpf/test_run.c         |  6 +++-
>  5 files changed, 86 insertions(+), 19 deletions(-)
> 
> Changelogs:
>   v2 -> v3:
>     . merge two patches as bpf_test_run() will have compilation error
>       and may fail with other changes.
>     . rewrite bpf_cgroup_storage_set() to be more inline with kernel
>       coding style.
>   v1 -> v2:
>     . fix compilation issues when CONFIG_CGROUPS is off or
>       CONFIG_CGROUP_BPF is off.
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index c42e02b4d84b..6a29fe11485d 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
>  struct bpf_cgroup_storage;
>  struct ctl_table;
>  struct ctl_table_header;
> +struct task_struct;
>  
>  #ifdef CONFIG_CGROUP_BPF
>  
>  extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
>  #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
>  
> -DECLARE_PER_CPU(struct bpf_cgroup_storage*,
> -		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
> +#define BPF_CGROUP_STORAGE_NEST_MAX	8
> +
> +struct bpf_cgroup_storage_info {
> +	struct task_struct *task;
> +	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
> +};
> +
> +/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
> + * to use bpf cgroup storage simultaneously.
> + */
> +DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
> +		bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
>  
>  #define for_each_cgroup_storage_type(stype) \
>  	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
> @@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
>  	return BPF_CGROUP_STORAGE_SHARED;
>  }
>  
> -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
> -					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
> +static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
> +					 *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
>  {
>  	enum bpf_cgroup_storage_type stype;
> +	int i, err = 0;
> +
> +	preempt_disable();
> +	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
> +		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
> +			continue;
> +
> +		this_cpu_write(bpf_cgroup_storage_info[i].task, current);
> +		for_each_cgroup_storage_type(stype)
> +			this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
> +				       storage[stype]);
> +		goto out;
> +	}
> +	err = -EBUSY;
> +	WARN_ON_ONCE(1);
> +
> +out:
> +	preempt_enable();
> +	return err;
> +}
> +
> +static inline void bpf_cgroup_storage_unset(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
> +		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
> +			continue;
>  
> -	for_each_cgroup_storage_type(stype)
> -		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
> +		this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
> +		return;
> +	}
>  }
>  
>  struct bpf_cgroup_storage *
> @@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
>  	return -EINVAL;
>  }
>  
> -static inline void bpf_cgroup_storage_set(
> -	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
> +static inline int bpf_cgroup_storage_set(
> +	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
> +static inline void bpf_cgroup_storage_unset(void) {}
>  static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
>  					    struct bpf_map *map) { return 0; }
>  static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index a47285cd39c2..3a6ae69743ff 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1090,6 +1090,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
>  /* BPF program asks to set CN on the packet. */
>  #define BPF_RET_SET_CN						(1 << 0)
>  
> +/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
> + * if bpf_cgroup_storage_set() failed, the rest of programs
> + * will not execute. This should be a really rare scenario
> + * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
> + * preemptions all between bpf_cgroup_storage_set() and
> + * bpf_cgroup_storage_unset() on the same cpu.
> + */
>  #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
>  	({								\
>  		struct bpf_prog_array_item *_item;			\
> @@ -1102,10 +1109,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
>  		_array = rcu_dereference(array);			\
>  		_item = &_array->items[0];				\
>  		while ((_prog = READ_ONCE(_item->prog))) {		\
> -			bpf_cgroup_storage_set(_item->cgroup_storage);	\
> +			if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
> +				break;					\
>  			func_ret = func(_prog, ctx);			\
>  			_ret &= (func_ret & 1);				\
>  			*(ret_flags) |= (func_ret >> 1);			\
> +			bpf_cgroup_storage_unset();			\
>  			_item++;					\
>  		}							\
>  		rcu_read_unlock();					\
> @@ -1126,9 +1135,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
>  			goto _out;			\
>  		_item = &_array->items[0];		\
>  		while ((_prog = READ_ONCE(_item->prog))) {		\
> -			if (set_cg_storage)		\
> -				bpf_cgroup_storage_set(_item->cgroup_storage);	\
> -			_ret &= func(_prog, ctx);	\
> +			if (!set_cg_storage) {			\
> +				_ret &= func(_prog, ctx);	\
> +			} else {				\
> +				if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
> +					break;			\
> +				_ret &= func(_prog, ctx);	\
> +				bpf_cgroup_storage_unset();	\
> +			}				\
>  			_item++;			\
>  		}					\
>  _out:							\
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 074800226327..f306611c4ddf 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
>  };
>  
>  #ifdef CONFIG_CGROUP_BPF
> -DECLARE_PER_CPU(struct bpf_cgroup_storage*,
> -		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
> +DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
> +		bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
>  
>  BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
>  {
> @@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
>  	 * verifier checks that its value is correct.
>  	 */
>  	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
> -	struct bpf_cgroup_storage *storage;
> +	struct bpf_cgroup_storage *storage = NULL;
>  	void *ptr;
> +	int i;
>  
> -	storage = this_cpu_read(bpf_cgroup_storage[stype]);
> +	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
> +		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
> +			continue;
> +
> +		storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
> +		break;
> +	}
>  
>  	if (stype == BPF_CGROUP_STORAGE_SHARED)
>  		ptr = &READ_ONCE(storage->buf)->data[0];
> diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
> index 2d4f9ac12377..bd11db9774c3 100644
> --- a/kernel/bpf/local_storage.c
> +++ b/kernel/bpf/local_storage.c
> @@ -9,10 +9,11 @@
>  #include <linux/slab.h>
>  #include <uapi/linux/btf.h>
>  
> -DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
> -
>  #ifdef CONFIG_CGROUP_BPF
>  
> +DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
> +	       bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
> +
>  #include "../cgroup/cgroup-internal.h"
>  
>  #define LOCAL_STORAGE_CREATE_FLAG_MASK					\
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index 0abdd67f44b1..4aabf71cd95d 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
>  
>  	bpf_test_timer_enter(&t);
>  	do {
> -		bpf_cgroup_storage_set(storage);
> +		ret = bpf_cgroup_storage_set(storage);
> +		if (ret)
> +			break;
>  
>  		if (xdp)
>  			*retval = bpf_prog_run_xdp(prog, ctx);
>  		else
>  			*retval = BPF_PROG_RUN(prog, ctx);
> +
> +		bpf_cgroup_storage_unset();
>  	} while (bpf_test_timer_continue(&t, repeat, &ret, time));
>  	bpf_test_timer_leave(&t);
>  
> -- 
> 2.30.2
>