Hello.
在 2023/9/12 15:01, Chuyi Zhou 写道:
css_iter and process_iter should be used in rcu section. Specifically, in
sleepable progs explicit bpf_rcu_read_lock() is needed before use these
iters. In normal bpf progs that have implicit rcu_read_lock(), it's OK to
use them directly.
This patch checks whether we are in rcu cs before we want to invoke
bpf_iter_process_new and bpf_iter_css_{pre, post}_new in
mark_stack_slots_iter(). If the rcu protection is guaranteed, we would
let st->type = PTR_TO_STACK | MEM_RCU. is_iter_reg_valid_init() will
reject if reg->type is UNTRUSTED.
I use the following BPF Prog to test this patch:
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int iter_task_for_each_sleep(void *ctx)
{
struct task_struct *task;
struct task_struct *cur_task = bpf_get_current_task_btf();
if (cur_task->pid != target_pid)
return 0;
bpf_rcu_read_lock();
bpf_for_each(process, task) {
bpf_rcu_read_unlock();
if (task->pid == target_pid)
process_cnt += 1;
bpf_rcu_read_lock();
}
bpf_rcu_read_unlock();
return 0;
}
Unfortunately, we can pass the verifier.
Then I add some printk-messages before setting/clearing state to help debug:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d151e6b43a5f..35f3fa9471a9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1200,7 +1200,7 @@ static int mark_stack_slots_iter(struct
bpf_verifier_env *env,
__mark_reg_known_zero(st);
st->type = PTR_TO_STACK; /* we don't have dedicated reg
type */
if (is_iter_need_rcu(meta)) {
+ printk("mark reg_addr : %px", st);
if (in_rcu_cs(env))
st->type |= MEM_RCU;
else
@@ -11472,8 +11472,8 @@ static int check_kfunc_call(struct
bpf_verifier_env *env, struct bpf_insn *insn,
return -EINVAL;
} else if (rcu_unlock) {
bpf_for_each_reg_in_vstate(env->cur_state,
state, reg, ({
+ printk("clear reg_addr : %px MEM_RCU :
%d PTR_UNTRUSTED : %d\n ", reg, reg->type & MEM_RCU, reg->type &
PTR_UNTRUSTED);
if (reg->type & MEM_RCU) {
- printk("clear reg addr : %lld",
reg);
reg->type &= ~(MEM_RCU |
PTR_MAYBE_NULL);
reg->type |= PTR_UNTRUSTED;
}
The demsg log:
[ 393.705324] mark reg_addr : ffff88814e40e200
[ 393.706883] clear reg_addr : ffff88814d5f8000 MEM_RCU : 0
PTR_UNTRUSTED : 0
[ 393.707353] clear reg_addr : ffff88814d5f8078 MEM_RCU : 0
PTR_UNTRUSTED : 0
[ 393.708099] clear reg_addr : ffff88814d5f80f0 MEM_RCU : 0
PTR_UNTRUSTED : 0
....
....
I didn't see ffff88814e40e200 is cleared as expected because
bpf_for_each_reg_in_vstate didn't find it.
It seems when we are doing bpf_read_unlock() in the middle of iteration
and want to clearing state through bpf_for_each_reg_in_vstate, we can
not find the previous reg which we marked MEM_RCU/PTR_UNTRUSTED in
mark_stack_slots_iter().
I thought maybe the correct answer here is operating the *iter_reg*
parameter in mark_stack_slots_iter() direcly so we can find it in
bpf_for_each_reg_in_vstate.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6a6827ba7a18..53330ddf2b3c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1218,6 +1218,12 @@ static int mark_stack_slots_iter(struct
bpf_verifier_env *env,
mark_stack_slot_scratched(env, spi - i);
}
+ if (is_iter_need_rcu(meta)) {
+ if (in_rcu_cs(env))
+ reg->type |= MEM_RCU;
+ else
+ reg->type |= PTR_UNTRUSTED;
+ }
return 0;
}
@@ -1307,7 +1315,8 @@ static bool is_iter_reg_valid_init(struct
bpf_verifier_env *env, struct bpf_reg_
if (slot->slot_type[j] != STACK_ITER)
Kumarreturn false;
}
-
+ if (reg->type & PTR_UNTRUSTED)
+ return false;
return true;
}
However, it did not work either. The reason it didn't work is the state
of iter_reg will be cleared implicitly before the
is_iter_reg_valid_init() even we don't call bpf_rcu_unlock.
It would be appreciate if you could give some suggestion. Maby it worthy
to try the solution proposed by Kumar?[1]
[1]
https://lore.kernel.org/lkml/CAP01T77cWxWNwq5HLr+Woiu7k4-P3QQfJWX1OeQJUkxW3=P4bA@xxxxxxxxxxxxxx/#t