Re: [PATCH bpf-next 3/5] bpf: Add rcu btf_type_tag verifier support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 3, 2022 at 8:21 AM Yonghong Song <yhs@xxxxxx> wrote:
>
> A new bpf_type_flag MEM_RCU is added to indicate a PTR_TO_BTF_ID
> object access needing rcu_read_lock protection. The rcu protection
> is not needed for non-sleepable program. So various verification
> checking is only done for sleepable programs. In particular, only
> the following insns can be inside bpf_rcu_read_lock() region:
>   - any non call insns except BPF_ABS/BPF_IND
>   - non sleepable helpers and kfuncs.
> Also, bpf_*_storage_get() helper's 5th hidden argument (for memory
> allocation flag) should be GFP_ATOMIC.
>
> If a pointer (PTR_TO_BTF_ID) is marked as rcu, then any use of
> this pointer and the load which gets this pointer needs to be
> protected by bpf_rcu_read_lock(). The following shows a couple
> of examples:
>   struct task_struct {
>         ...
>         struct task_struct __rcu        *real_parent;
>         struct css_set __rcu            *cgroups;
>         ...
>   };
>   struct css_set {
>         ...
>         struct cgroup *dfl_cgrp;
>         ...
>   }
>   ...
>   task = bpf_get_current_task_btf();
>   cgroups = task->cgroups;
>   dfl_cgroup = cgroups->dfl_cgrp;
>   ... using dfl_cgroup ...
>
> The bpf_rcu_read_lock/unlock() should be added like below to
> avoid verification failures.
>   task = bpf_get_current_task_btf();
>   bpf_rcu_read_lock();
>   cgroups = task->cgroups;
>   dfl_cgroup = cgroups->dfl_cgrp;
>   bpf_rcu_read_unlock();
>   ... using dfl_cgroup ...
>
> The following is another example for task->real_parent.
>   task = bpf_get_current_task_btf();
>   bpf_rcu_read_lock();
>   real_parent = task->real_parent;
>   ... bpf_task_storage_get(&map, real_parent, 0, 0);
>   bpf_rcu_read_unlock();
>
> There is another case observed in selftest bpf_iter_ipv6_route.c:
>   struct fib6_info *rt = ctx->rt;
>   ...
>   fib6_nh = &rt->fib6_nh[0]; // Not rcu protected
>   ...
>   if (rt->nh)
>     fib6_nh = &nh->nh_info->fib6_nh; // rcu protected
>   ...
>   ... using fib6_nh ...
> Currently verification will fail with
>   same insn cannot be used with different pointers
> since the use of fib6_nh is tag with rcu in one path
> but not in the other path. The above use case is a valid
> one so the verifier is changed to ignore MEM_RCU type tag
> in such cases.
>
> Signed-off-by: Yonghong Song <yhs@xxxxxx>
> ---
>  include/linux/bpf.h          |   3 +
>  include/linux/bpf_verifier.h |   1 +
>  kernel/bpf/btf.c             |  11 +++
>  kernel/bpf/verifier.c        | 126 ++++++++++++++++++++++++++++++++---
>  4 files changed, 133 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index a9bda4c91fc7..f0d973c8d227 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -458,6 +458,9 @@ enum bpf_type_flag {
>         /* Size is known at compile time. */
>         MEM_FIXED_SIZE          = BIT(10 + BPF_BASE_TYPE_BITS),
>
> +       /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
> +       MEM_RCU                 = BIT(11 + BPF_BASE_TYPE_BITS),
> +
>         __BPF_TYPE_FLAG_MAX,
>         __BPF_TYPE_LAST_FLAG    = __BPF_TYPE_FLAG_MAX - 1,
>  };
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 1a32baa78ce2..d4e56f5a4b20 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -324,6 +324,7 @@ struct bpf_verifier_state {
>         u32 insn_idx;
>         u32 curframe;
>         u32 active_spin_lock;
> +       u32 active_rcu_lock;
>         bool speculative;
>
>         /* first and last insn idx of this verifier state */
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 35c07afac924..293d224a7f53 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -5527,6 +5527,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>                         info->reg_type |= MEM_USER;
>                 if (strcmp(tag_value, "percpu") == 0)
>                         info->reg_type |= MEM_PERCPU;
> +               if (strcmp(tag_value, "rcu") == 0)
> +                       info->reg_type |= MEM_RCU;
>         }
>
>         /* skip modifiers */
> @@ -5765,6 +5767,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
>                                 /* check __percpu tag */
>                                 if (strcmp(tag_value, "percpu") == 0)
>                                         tmp_flag = MEM_PERCPU;
> +                               /* check __rcu tag */
> +                               if (strcmp(tag_value, "rcu") == 0)
> +                                       tmp_flag = MEM_RCU;
>                         }
>
>                         stype = btf_type_skip_modifiers(btf, mtype->type, &id);
> @@ -6560,6 +6565,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
>                 return -EINVAL;
>         }
>
> +       if (sleepable && env->cur_state->active_rcu_lock) {
> +               bpf_log(log, "kernel function %s is sleepable within rcu_read_lock region\n",
> +                       func_name);
> +               return -EINVAL;
> +       }
> +
>         if (kfunc_meta && ref_obj_id)
>                 kfunc_meta->ref_obj_id = ref_obj_id;
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 82c07fe0bfb1..3c5afd3bc216 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -188,6 +188,9 @@ struct bpf_verifier_stack_elem {
>                                           POISON_POINTER_DELTA))
>  #define BPF_MAP_PTR(X)         ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
>
> +/* Using insn->off = BPF_STORAGE_GET_CALL to mark bpf_*_storage_get() helper calls. */
> +#define BPF_STORAGE_GET_CALL   1
> +
>  static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
>  static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
>
> @@ -511,6 +514,22 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
>         return func_id == BPF_FUNC_dynptr_data;
>  }
>
> +static bool is_storage_get_function(enum bpf_func_id func_id)
> +{
> +       return func_id == BPF_FUNC_sk_storage_get ||
> +              func_id == BPF_FUNC_inode_storage_get ||
> +              func_id == BPF_FUNC_task_storage_get ||
> +              func_id == BPF_FUNC_cgrp_storage_get;
> +}
> +
> +static bool is_sleepable_function(enum bpf_func_id func_id)
> +{
> +       return func_id == BPF_FUNC_copy_from_user ||
> +              func_id == BPF_FUNC_copy_from_user_task ||
> +              func_id == BPF_FUNC_ima_inode_hash ||
> +              func_id == BPF_FUNC_ima_file_hash;
> +}

This is a bit concerning that these are in two different places in the kernel.
We expose a helper based on prog->aux->sleepable in different places and
it's worth doing some refactoring to keep all this logic in a single place.

> +
>  static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
>                                         const struct bpf_map *map)
>  {
> @@ -583,6 +602,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
>                 strncpy(prefix, "percpu_", 32);
>         if (type & PTR_UNTRUSTED)
>                 strncpy(prefix, "untrusted_", 32);
> +       if (type & MEM_RCU)
> +               strncpy(prefix, "rcu_", 32);
>
>         snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
>                  prefix, str[base_type(type)], postfix);
> @@ -1201,6 +1222,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
>         dst_state->speculative = src->speculative;
>         dst_state->curframe = src->curframe;
>         dst_state->active_spin_lock = src->active_spin_lock;
> +       dst_state->active_rcu_lock = src->active_rcu_lock;
>         dst_state->branches = src->branches;
>         dst_state->parent = src->parent;
>         dst_state->first_insn_idx = src->first_insn_idx;
> @@ -4536,6 +4558,14 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
>                 return -EACCES;
>         }
>
> +       if ((reg->type & MEM_RCU) && env->prog->aux->sleepable &&
> +           !env->cur_state->active_rcu_lock) {
> +               verbose(env,
> +                       "R%d is ptr_%s access rcu-protected memory with off=%d, not in rcu_read_lock region\n",
> +                       regno, tname, off);
> +               return -EACCES;
> +       }
> +
>         if (env->ops->btf_struct_access) {
>                 ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
>                                                   off, size, atype, &btf_id, &flag);
> @@ -4552,6 +4582,14 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
>         if (ret < 0)
>                 return ret;
>
> +       if ((flag & MEM_RCU) && env->prog->aux->sleepable &&
> +           !env->cur_state->active_rcu_lock) {
> +               verbose(env,
> +                       "R%d is rcu dereference ptr_%s with off=%d, not in rcu_read_lock region\n",
> +                       regno, tname, off);
> +               return -EACCES;
> +       }
> +
>         /* If this is an untrusted pointer, all pointers formed by walking it
>          * also inherit the untrusted flag.
>          */
> @@ -5684,7 +5722,12 @@ static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
>  static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
>  static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
>  static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
> -static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
> +static const struct bpf_reg_types btf_ptr_types = {
> +       .types = {
> +               PTR_TO_BTF_ID,
> +               PTR_TO_BTF_ID | MEM_RCU,
> +       }
> +};
>  static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
>  static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
>  static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
> @@ -5758,6 +5801,20 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
>         if (arg_type & PTR_MAYBE_NULL)
>                 type &= ~PTR_MAYBE_NULL;
>
> +       /* If the reg type is marked as MEM_RCU, ensure the usage is in the rcu_read_lock
> +        * region, and remove MEM_RCU from the type since the arg_type won't encode
> +        * MEM_RCU.
> +        */
> +       if (type & MEM_RCU) {
> +               if (env->prog->aux->sleepable && !env->cur_state->active_rcu_lock) {
> +                       verbose(env,
> +                               "R%d is arg type %s needs rcu protection\n",
> +                               regno, reg_type_str(env, reg->type));
> +                       return -EACCES;
> +               }
> +               type &= ~MEM_RCU;
> +       }
> +
>         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
>                 expected = compatible->types[i];
>                 if (expected == NOT_INIT)
> @@ -5774,7 +5831,8 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
>         return -EACCES;
>
>  found:
> -       if (reg->type == PTR_TO_BTF_ID) {
> +       /* reg is already protected by rcu_read_lock(). Peel off MEM_RCU from reg->type. */
> +       if ((reg->type & ~MEM_RCU) == PTR_TO_BTF_ID) {
>                 /* For bpf_sk_release, it needs to match against first member
>                  * 'struct sock_common', hence make an exception for it. This
>                  * allows bpf_sk_release to work for multiple socket types.
> @@ -5850,6 +5908,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
>          * fixed offset.
>          */
>         case PTR_TO_BTF_ID:
> +       case PTR_TO_BTF_ID | MEM_RCU:
>                 /* When referenced PTR_TO_BTF_ID is passed to release function,
>                  * it's fixed offset must be 0. In the other cases, fixed offset
>                  * can be non-zero.
> @@ -7289,6 +7348,26 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
>         }
>
>         meta.func_id = func_id;
> +
> +       if (func_id == BPF_FUNC_rcu_read_lock)
> +               env->cur_state->active_rcu_lock++;
> +       if (func_id == BPF_FUNC_rcu_read_unlock) {
> +               if (env->cur_state->active_rcu_lock == 0) {
> +                       verbose(env, "missing bpf_rcu_read_lock\n");
> +                       return -EINVAL;
> +               }
> +
> +               env->cur_state->active_rcu_lock--;
> +       }
> +       if (env->cur_state->active_rcu_lock) {
> +               if (is_sleepable_function(func_id))
> +                       verbose(env, "sleepable helper %s#%din rcu_read_lock region\n",
> +                               func_id_name(func_id), func_id);
> +
> +               if (env->prog->aux->sleepable && is_storage_get_function(func_id))
> +                       insn->off = BPF_STORAGE_GET_CALL;
> +       }
> +
>         /* check args */
>         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
>                 err = check_func_arg(env, i, &meta, fn);
> @@ -10470,6 +10549,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
>                 return -EINVAL;
>         }
>
> +       if (env->prog->aux->sleepable && env->cur_state->active_rcu_lock) {
> +               verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
> +               return -EINVAL;
> +       }
> +
>         if (regs[ctx_reg].type != PTR_TO_CTX) {
>                 verbose(env,
>                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
> @@ -11734,6 +11818,9 @@ static bool states_equal(struct bpf_verifier_env *env,
>         if (old->active_spin_lock != cur->active_spin_lock)
>                 return false;
>
> +       if (old->active_rcu_lock != cur->active_rcu_lock)
> +               return false;
> +
>         /* for states to be equal callsites have to be the same
>          * and all frame states need to be equivalent
>          */
> @@ -12141,6 +12228,11 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
>                                !reg_type_mismatch_ok(prev));
>  }
>
> +static bool reg_type_mismatch_ignore_rcu(enum bpf_reg_type src, enum bpf_reg_type prev)
> +{
> +       return reg_type_mismatch(src & ~MEM_RCU, prev & ~MEM_RCU);
> +}
> +
>  static int do_check(struct bpf_verifier_env *env)
>  {
>         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
> @@ -12266,6 +12358,18 @@ static int do_check(struct bpf_verifier_env *env)
>
>                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
>
> +                       /* For NOT_INIT *prev_src_type, ignore rcu type tag.
> +                        * For code like below,
> +                        *   struct foo *f;
> +                        *   if (...)
> +                        *     f = ...; // f with MEM_RCU type tag.
> +                        *   else
> +                        *     f = ...; // f without MEM_RCU type tag.
> +                        *   ... f ...  // Here f could be with/without MEM_RCU
> +                        *
> +                        * It is safe to ignore MEM_RCU type tag here since
> +                        * base types are the same.
> +                        */
>                         if (*prev_src_type == NOT_INIT) {
>                                 /* saw a valid insn
>                                  * dst_reg = *(u32 *)(src_reg + off)
> @@ -12273,7 +12377,7 @@ static int do_check(struct bpf_verifier_env *env)
>                                  */
>                                 *prev_src_type = src_reg_type;
>
> -                       } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
> +                       } else if (reg_type_mismatch_ignore_rcu(src_reg_type, *prev_src_type)) {
>                                 /* ABuser program is trying to use the same insn
>                                  * dst_reg = *(u32*) (src_reg + off)
>                                  * with different pointer types:
> @@ -12412,6 +12516,11 @@ static int do_check(struct bpf_verifier_env *env)
>                                         return -EINVAL;
>                                 }
>
> +                               if (env->cur_state->active_rcu_lock) {
> +                                       verbose(env, "bpf_rcu_read_unlock is missing\n");
> +                                       return -EINVAL;
> +                               }
> +
>                                 /* We must do check_reference_leak here before
>                                  * prepare_func_exit to handle the case when
>                                  * state->curframe > 0, it may be a callback
> @@ -13499,6 +13608,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
>                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
>                         break;
>                 case PTR_TO_BTF_ID:
> +               case PTR_TO_BTF_ID | MEM_RCU:
>                 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
>                         if (type == BPF_READ) {
>                                 insn->code = BPF_LDX | BPF_PROBE_MEM |
> @@ -14148,11 +14258,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>                         goto patch_call_imm;
>                 }
>
> -               if (insn->imm == BPF_FUNC_task_storage_get ||
> -                   insn->imm == BPF_FUNC_sk_storage_get ||
> -                   insn->imm == BPF_FUNC_inode_storage_get ||
> -                   insn->imm == BPF_FUNC_cgrp_storage_get) {
> -                       if (env->prog->aux->sleepable)
> +               if (is_storage_get_function(insn->imm)) {
> +                       if (env->prog->aux->sleepable && insn->off) {

I would recommend explicitly checking if insn->off == BPF_STORAGE_GET_CALL.

Also, your comment says you are marking BPF_STORAGE_GET_CALL but this is only
set when the call is in a classical RCU critical section and in a
sleepable program. The
The name and the comment above should reflect that.

BPF_STORAGE_GET_CALL_ATOMIC or something.


> +                               insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
> +                               insn->off = 0;
> +                       } else if (env->prog->aux->sleepable)
>                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
>                         else
>                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
> --
> 2.30.2
>



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux