Re: [PATCH bpf-next 3/5] bpf: Add rcu btf_type_tag verifier support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 11/3/22 7:43 AM, KP Singh wrote:
On Thu, Nov 3, 2022 at 8:21 AM Yonghong Song <yhs@xxxxxx> wrote:

A new bpf_type_flag MEM_RCU is added to indicate a PTR_TO_BTF_ID
object access needing rcu_read_lock protection. The rcu protection
is not needed for non-sleepable program. So various verification
checking is only done for sleepable programs. In particular, only
the following insns can be inside bpf_rcu_read_lock() region:
   - any non call insns except BPF_ABS/BPF_IND
   - non sleepable helpers and kfuncs.
Also, bpf_*_storage_get() helper's 5th hidden argument (for memory
allocation flag) should be GFP_ATOMIC.

If a pointer (PTR_TO_BTF_ID) is marked as rcu, then any use of
this pointer and the load which gets this pointer needs to be
protected by bpf_rcu_read_lock(). The following shows a couple
of examples:
   struct task_struct {
         ...
         struct task_struct __rcu        *real_parent;
         struct css_set __rcu            *cgroups;
         ...
   };
   struct css_set {
         ...
         struct cgroup *dfl_cgrp;
         ...
   }
   ...
   task = bpf_get_current_task_btf();
   cgroups = task->cgroups;
   dfl_cgroup = cgroups->dfl_cgrp;
   ... using dfl_cgroup ...

The bpf_rcu_read_lock/unlock() should be added like below to
avoid verification failures.
   task = bpf_get_current_task_btf();
   bpf_rcu_read_lock();
   cgroups = task->cgroups;
   dfl_cgroup = cgroups->dfl_cgrp;
   bpf_rcu_read_unlock();
   ... using dfl_cgroup ...

The following is another example for task->real_parent.
   task = bpf_get_current_task_btf();
   bpf_rcu_read_lock();
   real_parent = task->real_parent;
   ... bpf_task_storage_get(&map, real_parent, 0, 0);
   bpf_rcu_read_unlock();

There is another case observed in selftest bpf_iter_ipv6_route.c:
   struct fib6_info *rt = ctx->rt;
   ...
   fib6_nh = &rt->fib6_nh[0]; // Not rcu protected
   ...
   if (rt->nh)
     fib6_nh = &nh->nh_info->fib6_nh; // rcu protected
   ...
   ... using fib6_nh ...
Currently verification will fail with
   same insn cannot be used with different pointers
since the use of fib6_nh is tag with rcu in one path
but not in the other path. The above use case is a valid
one so the verifier is changed to ignore MEM_RCU type tag
in such cases.

Signed-off-by: Yonghong Song <yhs@xxxxxx>
---
  include/linux/bpf.h          |   3 +
  include/linux/bpf_verifier.h |   1 +
  kernel/bpf/btf.c             |  11 +++
  kernel/bpf/verifier.c        | 126 ++++++++++++++++++++++++++++++++---
  4 files changed, 133 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a9bda4c91fc7..f0d973c8d227 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -458,6 +458,9 @@ enum bpf_type_flag {
         /* Size is known at compile time. */
         MEM_FIXED_SIZE          = BIT(10 + BPF_BASE_TYPE_BITS),

+       /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
+       MEM_RCU                 = BIT(11 + BPF_BASE_TYPE_BITS),
+
         __BPF_TYPE_FLAG_MAX,
         __BPF_TYPE_LAST_FLAG    = __BPF_TYPE_FLAG_MAX - 1,
  };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1a32baa78ce2..d4e56f5a4b20 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -324,6 +324,7 @@ struct bpf_verifier_state {
         u32 insn_idx;
         u32 curframe;
         u32 active_spin_lock;
+       u32 active_rcu_lock;
         bool speculative;

         /* first and last insn idx of this verifier state */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 35c07afac924..293d224a7f53 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5527,6 +5527,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                         info->reg_type |= MEM_USER;
                 if (strcmp(tag_value, "percpu") == 0)
                         info->reg_type |= MEM_PERCPU;
+               if (strcmp(tag_value, "rcu") == 0)
+                       info->reg_type |= MEM_RCU;
         }

         /* skip modifiers */
@@ -5765,6 +5767,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
                                 /* check __percpu tag */
                                 if (strcmp(tag_value, "percpu") == 0)
                                         tmp_flag = MEM_PERCPU;
+                               /* check __rcu tag */
+                               if (strcmp(tag_value, "rcu") == 0)
+                                       tmp_flag = MEM_RCU;
                         }

                         stype = btf_type_skip_modifiers(btf, mtype->type, &id);
@@ -6560,6 +6565,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                 return -EINVAL;
         }

+       if (sleepable && env->cur_state->active_rcu_lock) {
+               bpf_log(log, "kernel function %s is sleepable within rcu_read_lock region\n",
+                       func_name);
+               return -EINVAL;
+       }
+
         if (kfunc_meta && ref_obj_id)
                 kfunc_meta->ref_obj_id = ref_obj_id;

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 82c07fe0bfb1..3c5afd3bc216 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -188,6 +188,9 @@ struct bpf_verifier_stack_elem {
                                           POISON_POINTER_DELTA))
  #define BPF_MAP_PTR(X)         ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))

+/* Using insn->off = BPF_STORAGE_GET_CALL to mark bpf_*_storage_get() helper calls. */
+#define BPF_STORAGE_GET_CALL   1
+
  static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
  static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);

@@ -511,6 +514,22 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
         return func_id == BPF_FUNC_dynptr_data;
  }

+static bool is_storage_get_function(enum bpf_func_id func_id)
+{
+       return func_id == BPF_FUNC_sk_storage_get ||
+              func_id == BPF_FUNC_inode_storage_get ||
+              func_id == BPF_FUNC_task_storage_get ||
+              func_id == BPF_FUNC_cgrp_storage_get;
+}
+
+static bool is_sleepable_function(enum bpf_func_id func_id)
+{
+       return func_id == BPF_FUNC_copy_from_user ||
+              func_id == BPF_FUNC_copy_from_user_task ||
+              func_id == BPF_FUNC_ima_inode_hash ||
+              func_id == BPF_FUNC_ima_file_hash;
+}

This is a bit concerning that these are in two different places in the kernel.
We expose a helper based on prog->aux->sleepable in different places and
it's worth doing some refactoring to keep all this logic in a single place.

Okay, let me do a restructing to have a central place to check
helpers for sleepable program only.


+
  static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
                                         const struct bpf_map *map)
  {
@@ -583,6 +602,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
                 strncpy(prefix, "percpu_", 32);
         if (type & PTR_UNTRUSTED)
                 strncpy(prefix, "untrusted_", 32);
+       if (type & MEM_RCU)
+               strncpy(prefix, "rcu_", 32);
[...]

-               if (insn->imm == BPF_FUNC_task_storage_get ||
-                   insn->imm == BPF_FUNC_sk_storage_get ||
-                   insn->imm == BPF_FUNC_inode_storage_get ||
-                   insn->imm == BPF_FUNC_cgrp_storage_get) {
-                       if (env->prog->aux->sleepable)
+               if (is_storage_get_function(insn->imm)) {
+                       if (env->prog->aux->sleepable && insn->off) {

I would recommend explicitly checking if insn->off == BPF_STORAGE_GET_CALL.

Also, your comment says you are marking BPF_STORAGE_GET_CALL but this is only
set when the call is in a classical RCU critical section and in a
sleepable program. The
The name and the comment above should reflect that.

BPF_STORAGE_GET_CALL_ATOMIC or something.

BPF_STORAGE_GET_CALL_ATOMIC sounds okay. Will use it unless I can come
up with a better name.



+                               insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
+                               insn->off = 0;
+                       } else if (env->prog->aux->sleepable)
                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
                         else
                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
--
2.30.2




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux