A new bpf_type_flag MEM_RCU is added to indicate a PTR_TO_BTF_ID
object access needing rcu_read_lock protection. The rcu protection
is not needed for non-sleepable program. So various verification
checking is only done for sleepable programs. In particular, only
the following insns can be inside bpf_rcu_read_lock() region:
- any non call insns except BPF_ABS/BPF_IND
- non sleepable helpers and kfuncs.
Also, bpf_*_storage_get() helper's 5th hidden argument (for memory
allocation flag) should be GFP_ATOMIC.
If a pointer (PTR_TO_BTF_ID) is marked as rcu, then any use of
this pointer and the load which gets this pointer needs to be
protected by bpf_rcu_read_lock(). The following shows a couple
of examples:
struct task_struct {
...
struct task_struct __rcu *real_parent;
struct css_set __rcu *cgroups;
...
};
struct css_set {
...
struct cgroup *dfl_cgrp;
...
}
...
task = bpf_get_current_task_btf();
cgroups = task->cgroups;
dfl_cgroup = cgroups->dfl_cgrp;
... using dfl_cgroup ...
The bpf_rcu_read_lock/unlock() should be added like below to
avoid verification failures.
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
cgroups = task->cgroups;
dfl_cgroup = cgroups->dfl_cgrp;
bpf_rcu_read_unlock();
... using dfl_cgroup ...
The following is another example for task->real_parent.
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
real_parent = task->real_parent;
... bpf_task_storage_get(&map, real_parent, 0, 0);
bpf_rcu_read_unlock();
There is another case observed in selftest bpf_iter_ipv6_route.c:
struct fib6_info *rt = ctx->rt;
...
fib6_nh = &rt->fib6_nh[0]; // Not rcu protected
...
if (rt->nh)
fib6_nh = &nh->nh_info->fib6_nh; // rcu protected
...
... using fib6_nh ...
Currently verification will fail with
same insn cannot be used with different pointers
since the use of fib6_nh is tag with rcu in one path
but not in the other path. The above use case is a valid
one so the verifier is changed to ignore MEM_RCU type tag
in such cases.
Signed-off-by: Yonghong Song <yhs@xxxxxx>
---
include/linux/bpf.h | 3 +
include/linux/bpf_verifier.h | 1 +
kernel/bpf/btf.c | 11 +++
kernel/bpf/verifier.c | 126 ++++++++++++++++++++++++++++++++---
4 files changed, 133 insertions(+), 8 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a9bda4c91fc7..f0d973c8d227 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -458,6 +458,9 @@ enum bpf_type_flag {
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
+ /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
+ MEM_RCU = BIT(11 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1a32baa78ce2..d4e56f5a4b20 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -324,6 +324,7 @@ struct bpf_verifier_state {
u32 insn_idx;
u32 curframe;
u32 active_spin_lock;
+ u32 active_rcu_lock;
bool speculative;
/* first and last insn idx of this verifier state */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 35c07afac924..293d224a7f53 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5527,6 +5527,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->reg_type |= MEM_USER;
if (strcmp(tag_value, "percpu") == 0)
info->reg_type |= MEM_PERCPU;
+ if (strcmp(tag_value, "rcu") == 0)
+ info->reg_type |= MEM_RCU;
}
/* skip modifiers */
@@ -5765,6 +5767,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
/* check __percpu tag */
if (strcmp(tag_value, "percpu") == 0)
tmp_flag = MEM_PERCPU;
+ /* check __rcu tag */
+ if (strcmp(tag_value, "rcu") == 0)
+ tmp_flag = MEM_RCU;
}
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
@@ -6560,6 +6565,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
+ if (sleepable && env->cur_state->active_rcu_lock) {
+ bpf_log(log, "kernel function %s is sleepable within rcu_read_lock region\n",
+ func_name);
+ return -EINVAL;
+ }
+
if (kfunc_meta && ref_obj_id)
kfunc_meta->ref_obj_id = ref_obj_id;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 82c07fe0bfb1..3c5afd3bc216 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -188,6 +188,9 @@ struct bpf_verifier_stack_elem {
POISON_POINTER_DELTA))
#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
+/* Using insn->off = BPF_STORAGE_GET_CALL to mark bpf_*_storage_get() helper calls. */
+#define BPF_STORAGE_GET_CALL 1
+
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
@@ -511,6 +514,22 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
return func_id == BPF_FUNC_dynptr_data;
}
+static bool is_storage_get_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_sk_storage_get ||
+ func_id == BPF_FUNC_inode_storage_get ||
+ func_id == BPF_FUNC_task_storage_get ||
+ func_id == BPF_FUNC_cgrp_storage_get;
+}
+
+static bool is_sleepable_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_copy_from_user ||
+ func_id == BPF_FUNC_copy_from_user_task ||
+ func_id == BPF_FUNC_ima_inode_hash ||
+ func_id == BPF_FUNC_ima_file_hash;
+}