Re: [PATCH bpf-next] bpf: Handle MEM_RCU type properly

Yonghong Song <yhs@xxxxxxxx> · Thu, 1 Dec 2022 09:47:22 -0800

On 11/30/22 11:05 PM, Martin KaFai Lau wrote:
On 11/28/22 6:37 PM, Yonghong Song wrote:

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c05aa6e1f6f5..6f192dd9025e 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -683,7 +683,7 @@ static inline bool bpf_prog_check_recur(const 
struct bpf_prog *prog)
      }
  }
-#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
[ ... ]

+static bool is_rcu_reg(const struct bpf_reg_state *reg)
+{
+    return reg->type & MEM_RCU;
+}
+
  static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
                     const struct bpf_reg_state *reg,
                     int off, int size, bool strict)
@@ -4775,12 +4780,10 @@ static int check_ptr_to_btf_access(struct 
bpf_verifier_env *env,
          /* Mark value register as MEM_RCU only if it is protected by
           * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
           * itself can already indicate trustedness inside the rcu
-         * read lock region. Also mark it as PTR_TRUSTED.
+         * read lock region.
           */
          if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
              flag &= ~MEM_RCU;
How about dereferencing a PTR_TO_BTF_ID | MEM_RCU, like:

     /* parent: PTR_TO_BTF_ID | MEM_RCU */
     parent = current->real_parent;
     /* gparent: PTR_TO_BTF_ID */
     gparent = parent->real_parent;

Should "gparent" have MEM_RCU also?
Currently, no. We have logic in the code like

        if (flag & MEM_RCU) {
                /* Mark value register as MEM_RCU only if it is 
protected by
                 * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
                 * itself can already indicate trustedness inside the rcu
                 * read lock region.
                 */
                if (!env->cur_state->active_rcu_lock || 
!is_trusted_reg(reg))
                        flag &= ~MEM_RCU;
        }

Since 'parent' is not trusted, so it will not be marked as MEM_RCU.
It can be marked as MEM_RCU if we do (based on the current patch)

	parent = current->real_parent;
	parent2 = bpf_task_acquire_rcu(parent);
	if (!parent2) goto out;
	gparent = parent2->real_parent;

Now gparent will be marked as MEM_RCU.

Also, should PTR_MAYBE_NULL be added to "parent"?
I think we might need to do this. Although from kernel code,
task->real_parent, current->cgroups seem not NULL. But for sure
there are cases where the rcu ptr could be NULL. This might
be conservative for some cases, and if it is absolutely
performance critical, we later could tag related __rcu member
with btf_decl_tag to indicate its non-null status.

-        else
-            flag |= PTR_TRUSTED;
      } else if (reg->type & MEM_RCU) {
          /* ptr (reg) is marked as MEM_RCU, but the struct field is 
not tagged
           * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
@@ -5945,7 +5948,7 @@ static const struct bpf_reg_types btf_ptr_types = {
      .types = {
          PTR_TO_BTF_ID,
          PTR_TO_BTF_ID | PTR_TRUSTED,
-        PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
+        PTR_TO_BTF_ID | MEM_RCU,
      },
  };
  static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -6124,7 +6127,7 @@ int check_func_arg_reg_off(struct 
bpf_verifier_env *env,
      case PTR_TO_BTF_ID:
      case PTR_TO_BTF_ID | MEM_ALLOC:
      case PTR_TO_BTF_ID | PTR_TRUSTED:
-    case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
+    case PTR_TO_BTF_ID | MEM_RCU:
      case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
          /* When referenced PTR_TO_BTF_ID is passed to release function,
           * it's fixed offset must be 0.    In the other cases, fixed 
offset
@@ -8022,6 +8025,11 @@ static bool is_kfunc_destructive(struct 
bpf_kfunc_call_arg_meta *meta)
      return meta->kfunc_flags & KF_DESTRUCTIVE;
  }
+static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
+{
+    return meta->kfunc_flags & KF_RCU;
+}
+
  static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta 
*meta, int arg)
  {
      return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
@@ -8706,13 +8714,19 @@ static int check_kfunc_args(struct 
bpf_verifier_env *env, struct bpf_kfunc_call_
          switch (kf_arg_type) {
          case KF_ARG_PTR_TO_ALLOC_BTF_ID:
          case KF_ARG_PTR_TO_BTF_ID:
-            if (!is_kfunc_trusted_args(meta))
+            if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
                  break;
-            if (!is_trusted_reg(reg)) {
-                verbose(env, "R%d must be referenced or trusted\n", 
regno);
+            if (!is_trusted_reg(reg) && !is_rcu_reg(reg)) {
+                verbose(env, "R%d must be referenced, trusted or 
rcu\n", regno);
                  return -EINVAL;
              }
+
+            if (is_kfunc_rcu(meta) != is_rcu_reg(reg)) {
I think is_trusted_reg(reg) should also be acceptable to 
bpf_task_acquire_rcu().
Yes, good point. trusted is a super set of rcu.

nit. bpf_task_acquire_not_zero() may be a better kfunc name.
Will use this one.

+                verbose(env, "R%d does not match kf arg rcu 
tagging\n", regno);
+                return -EINVAL;
+            }
+
              fallthrough;
          case KF_ARG_PTR_TO_CTX:
              /* Trusted arguments have the same offset checks as 
release arguments */
@@ -8823,7 +8837,7 @@ static int check_kfunc_args(struct 
bpf_verifier_env *env, struct bpf_kfunc_call_
          case KF_ARG_PTR_TO_BTF_ID:
              /* Only base_type is checked, further checks are done 
here */
              if ((base_type(reg->type) != PTR_TO_BTF_ID ||
-                 bpf_type_has_unsafe_modifiers(reg->type)) &&
+                 (bpf_type_has_unsafe_modifiers(reg->type) && 
!is_rcu_reg(reg))) &&
                  !reg2btf_ids[base_type(reg->type)]) {
                  verbose(env, "arg#%d is %s ", i, reg_type_str(env, 
reg->type));
                  verbose(env, "expected %s or socket\n",
@@ -8938,7 +8952,7 @@ static int check_kfunc_call(struct 
bpf_verifier_env *env, struct bpf_insn *insn,
          } else if (rcu_unlock) {
              bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
                  if (reg->type & MEM_RCU) {
-                    reg->type &= ~(MEM_RCU | PTR_TRUSTED);
+                    reg->type &= ~MEM_RCU;
                      reg->type |= PTR_UNTRUSTED;
                  }
              }));
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c 
b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index 973f0c5af965..5fbd9edd2c4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -93,10 +93,10 @@ static struct {
      const char *prog_name;
      const char *expected_err_msg;
  } failure_tests[] = {
-    {"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or 
trusted"},
+    {"cgrp_kfunc_acquire_untrusted", "R1 must be referenced, trusted 
or rcu"},
      {"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must 
point"},
      {"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported 
for arg#0 function"},
-    {"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or 
trusted"},
+    {"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced, 
trusted or rcu"},
      {"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup 
must point"},
      {"cgrp_kfunc_acquire_unreleased", "Unreleased reference"},
      {"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map 
value"},
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c 
b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index ffd8ef4303c8..80708c073de6 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -87,10 +87,10 @@ static struct {
      const char *prog_name;
      const char *expected_err_msg;
  } failure_tests[] = {
-    {"task_kfunc_acquire_untrusted", "R1 must be referenced or 
trusted"},
+    {"task_kfunc_acquire_untrusted", "R1 must be referenced, trusted 
or rcu"},
      {"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct 
must point"},
      {"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported 
for arg#0 function"},
-    {"task_kfunc_acquire_trusted_walked", "R1 must be referenced or 
trusted"},
+    {"task_kfunc_acquire_trusted_walked", "R1 must be referenced, 
trusted or rcu"},
hmm... why this description is changed here?  The bpf_task_acquire 
kfunc-flags has not changed.
This is due to my code change below.

-                       if (!is_trusted_reg(reg)) {
-                               verbose(env, "R%d must be referenced or 
trusted\n", regno);
+                       if (!is_trusted_reg(reg) && !is_rcu_reg(reg)) {
+                               verbose(env, "R%d must be referenced, 
trusted or rcu\n", regno);
                                return -EINVAL;
                        }

I will change code differently to avoid verifier log change.