On Sat, Feb 3, 2024 at 6:03 AM Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> wrote: > > On Wed, Jan 31, 2024 at 6:55 AM Yafang Shao <laoar.shao@xxxxxxxxx> wrote: > > > > Add selftests for the newly added cpumask iter. > > - cpumask_iter_success > > - The number of CPUs should be expected when iterating over the cpumask > > - percpu data extracted from the percpu struct should be expected > > - It can work in both non-sleepable and sleepable prog > > - RCU lock is only required by bpf_iter_cpumask_new() > > - It is fine without calling bpf_iter_cpumask_next() > > > > - cpumask_iter_failure > > - RCU lock is required in sleepable prog > > - The cpumask to be iterated over can't be NULL > > - bpf_iter_cpumask_destroy() is required after calling > > bpf_iter_cpumask_new() > > - bpf_iter_cpumask_destroy() can only destroy an initilialized iter > > - bpf_iter_cpumask_next() must use an initilialized iter > > typos: initialized will fix it. > > > > > The result as follows, > > > > #64/37 cpumask/test_cpumask_iter:OK > > #64/38 cpumask/test_cpumask_iter_sleepable:OK > > #64/39 cpumask/test_cpumask_iter_sleepable:OK > > #64/40 cpumask/test_cpumask_iter_next_no_rcu:OK > > #64/41 cpumask/test_cpumask_iter_no_next:OK > > #64/42 cpumask/test_cpumask_iter:OK > > #64/43 cpumask/test_cpumask_iter_no_rcu:OK > > #64/44 cpumask/test_cpumask_iter_no_destroy:OK > > #64/45 cpumask/test_cpumask_iter_null_pointer:OK > > #64/46 cpumask/test_cpumask_iter_next_uninit:OK > > #64/47 cpumask/test_cpumask_iter_destroy_uninit:OK > > #64 cpumask:OK > > > > Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> > > --- > > tools/testing/selftests/bpf/config | 1 + > > .../selftests/bpf/prog_tests/cpumask.c | 152 ++++++++++++++++++ > > .../selftests/bpf/progs/cpumask_common.h | 3 + > > .../bpf/progs/cpumask_iter_failure.c | 99 ++++++++++++ > > .../bpf/progs/cpumask_iter_success.c | 126 +++++++++++++++ > > 5 files changed, 381 insertions(+) > > create mode 100644 tools/testing/selftests/bpf/progs/cpumask_iter_failure.c > > create mode 100644 tools/testing/selftests/bpf/progs/cpumask_iter_success.c > > > > LGTM overall, except for seemingly unnecessary use of a big macro > > > diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h > > index 0cd4aebb97cf..cdb9dc95e9d9 100644 > > --- a/tools/testing/selftests/bpf/progs/cpumask_common.h > > +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h > > @@ -55,6 +55,9 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym > > u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; > > u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; > > u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; > > +int bpf_iter_cpumask_new(struct bpf_iter_cpumask *it, const struct cpumask *mask) __ksym; > > +int *bpf_iter_cpumask_next(struct bpf_iter_cpumask *it) __ksym; > > +void bpf_iter_cpumask_destroy(struct bpf_iter_cpumask *it) __ksym; > > let's mark them __weak so they don't conflict with definitions that > will eventually come from vmlinux.h (that applies to all the kfunc > definitions we currently have and we'll need to clean all that up, but > let's not add non-weak kfuncs going forward) will change it. > > > > > void bpf_rcu_read_lock(void) __ksym; > > void bpf_rcu_read_unlock(void) __ksym; > > [...] > > > diff --git a/tools/testing/selftests/bpf/progs/cpumask_iter_success.c b/tools/testing/selftests/bpf/progs/cpumask_iter_success.c > > new file mode 100644 > > index 000000000000..4ce14ef98451 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/progs/cpumask_iter_success.c > > @@ -0,0 +1,126 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* Copyright (c) 2024 Yafang Shao <laoar.shao@xxxxxxxxx> */ > > + > > +#include "vmlinux.h" > > +#include <bpf/bpf_helpers.h> > > +#include <bpf/bpf_tracing.h> > > + > > +#include "task_kfunc_common.h" > > +#include "cpumask_common.h" > > + > > +char _license[] SEC("license") = "GPL"; > > + > > +extern const struct psi_group_cpu system_group_pcpu __ksym __weak; > > +extern const struct rq runqueues __ksym __weak; > > + > > +int pid; > > + > > +#define READ_PERCPU_DATA(meta, cgrp, mask) \ > > +{ \ > > + u32 nr_running = 0, psi_nr_running = 0, nr_cpus = 0; \ > > + struct psi_group_cpu *groupc; \ > > + struct rq *rq; \ > > + int *cpu; \ > > + \ > > + bpf_for_each(cpumask, cpu, mask) { \ > > + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, *cpu); \ > > + if (!rq) { \ > > + err += 1; \ > > + continue; \ > > + } \ > > + nr_running += rq->nr_running; \ > > + nr_cpus += 1; \ > > + \ > > + groupc = (struct psi_group_cpu *)bpf_per_cpu_ptr(&system_group_pcpu, *cpu); \ > > + if (!groupc) { \ > > + err += 1; \ > > + continue; \ > > + } \ > > + psi_nr_running += groupc->tasks[NR_RUNNING]; \ > > + } \ > > + BPF_SEQ_PRINTF(meta->seq, "nr_running %u nr_cpus %u psi_running %u\n", \ > > + nr_running, nr_cpus, psi_nr_running); \ > > +} > > + > > Does this have to be a gigantic macro? Why can't it be just a function? It seems that the verifier can't identify a function call between bpf_rcu_read_lock() and bpf_rcu_read_unlock(). That said, if there's a function call between them, the verifier will fail. Below is the full verifier log if I define it as : static inline void read_percpu_data(struct bpf_iter_meta *meta, struct cgroup *cgrp, const cpumask_t *mask) VERIFIER LOG: ============= 0: R1=ctx() R10=fp0 ; int BPF_PROG(test_cpumask_iter_sleepable, struct bpf_iter_meta *meta, struct cgroup *cgrp) 0: (b4) w7 = 0 ; R7_w=0 ; int BPF_PROG(test_cpumask_iter_sleepable, struct bpf_iter_meta *meta, struct cgroup *cgrp) 1: (79) r2 = *(u64 *)(r1 +8) ; R1=ctx() R2_w=trusted_ptr_or_null_cgroup(id=1) ; if (!cgrp) 2: (15) if r2 == 0x0 goto pc+16 ; R2_w=trusted_ptr_cgroup() ; int BPF_PROG(test_cpumask_iter_sleepable, struct bpf_iter_meta *meta, struct cgroup *cgrp) 3: (79) r6 = *(u64 *)(r1 +0) func 'bpf_iter_cgroup' arg0 has btf_id 10966 type STRUCT 'bpf_iter_meta' 4: R1=ctx() R6_w=trusted_ptr_bpf_iter_meta() ; bpf_rcu_read_lock(); 4: (85) call bpf_rcu_read_lock#84184 ; ; p = bpf_task_from_pid(pid); 5: (18) r1 = 0xffffbc1ad3f72004 ; R1_w=map_value(map=cpumask_.bss,ks=4,vs=8,off=4) 7: (61) r1 = *(u32 *)(r1 +0) ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) ; p = bpf_task_from_pid(pid); 8: (85) call bpf_task_from_pid#84204 ; R0=ptr_or_null_task_struct(id=3,ref_obj_id=3) refs=3 9: (bf) r8 = r0 ; R0=ptr_or_null_task_struct(id=3,ref_obj_id=3) R8_w=ptr_or_null_task_struct(id=3,ref_obj_id=3) refs=3 10: (b4) w7 = 1 ; R7_w=1 refs=3 ; if (!p) { 11: (15) if r8 == 0x0 goto pc+6 ; R8_w=ptr_task_struct(ref_obj_id=3) refs=3 ; read_percpu_data(meta, cgrp, p->cpus_ptr); 12: (79) r2 = *(u64 *)(r8 +984) ; R2_w=rcu_ptr_cpumask() R8_w=ptr_task_struct(ref_obj_id=3) refs=3 ; read_percpu_data(meta, cgrp, p->cpus_ptr); 13: (bf) r1 = r6 ; R1_w=trusted_ptr_bpf_iter_meta() R6=trusted_ptr_bpf_iter_meta() refs=3 14: (85) call pc+6 caller: R6=trusted_ptr_bpf_iter_meta() R7_w=1 R8_w=ptr_task_struct(ref_obj_id=3) R10=fp0 refs=3 callee: frame1: R1_w=trusted_ptr_bpf_iter_meta() R2_w=rcu_ptr_cpumask() R10=fp0 refs=3 21: frame1: R1_w=trusted_ptr_bpf_iter_meta() R2_w=rcu_ptr_cpumask() R10=fp0 refs=3 ; static inline void read_percpu_data(struct bpf_iter_meta *meta, struct cgroup *cgrp, const cpumask_t *mask) 21: (bf) r8 = r1 ; frame1: R1_w=trusted_ptr_bpf_iter_meta() R8_w=trusted_ptr_bpf_iter_meta() refs=3 22: (bf) r7 = r10 ; frame1: R7_w=fp0 R10=fp0 refs=3 ; 23: (07) r7 += -24 ; frame1: R7_w=fp-24 refs=3 ; bpf_for_each(cpumask, cpu, mask) { 24: (bf) r1 = r7 ; frame1: R1_w=fp-24 R7_w=fp-24 refs=3 25: (85) call bpf_iter_cpumask_new#77163 ; frame1: R0=scalar() fp-24=iter_cpumask(ref_id=4,state=active,depth=0) refs=3,4 ; bpf_for_each(cpumask, cpu, mask) { 26: (bf) r1 = r7 ; frame1: R1=fp-24 R7=fp-24 refs=3,4 27: (85) call bpf_iter_cpumask_next#77165 ; frame1: R0_w=0 fp-24=iter_cpumask(ref_id=4,state=drained,depth=0) refs=3,4 28: (bf) r7 = r0 ; frame1: R0_w=0 R7_w=0 refs=3,4 29: (b4) w1 = 0 ; frame1: R1_w=0 refs=3,4 30: (63) *(u32 *)(r10 -40) = r1 ; frame1: R1_w=0 R10=fp0 fp-40=????0 refs=3,4 31: (b4) w1 = 0 ; frame1: R1_w=0 refs=3,4 32: (7b) *(u64 *)(r10 -32) = r1 ; frame1: R1_w=0 R10=fp0 fp-32_w=0 refs=3,4 33: (b4) w9 = 0 ; frame1: R9_w=0 refs=3,4 ; bpf_for_each(cpumask, cpu, mask) { 34: (15) if r7 == 0x0 goto pc+57 ; frame1: R7_w=0 refs=3,4 ; bpf_for_each(cpumask, cpu, mask) { 92: (bf) r1 = r10 ; frame1: R1_w=fp0 R10=fp0 refs=3,4 ; bpf_for_each(cpumask, cpu, mask) { 93: (07) r1 += -24 ; frame1: R1_w=fp-24 refs=3,4 94: (85) call bpf_iter_cpumask_destroy#77161 ; frame1: refs=3 ; BPF_SEQ_PRINTF(meta->seq, "nr_running %u nr_cpus %u psi_running %u\n", 95: (61) r1 = *(u32 *)(r10 -40) ; frame1: R1_w=0 R10=fp0 fp-40=????0 refs=3 96: (bc) w1 = w1 ; frame1: R1_w=0 refs=3 97: (7b) *(u64 *)(r10 -8) = r1 ; frame1: R1_w=0 R10=fp0 fp-8_w=0 refs=3 98: (79) r1 = *(u64 *)(r10 -32) ; frame1: R1_w=0 R10=fp0 fp-32=0 refs=3 99: (7b) *(u64 *)(r10 -16) = r1 ; frame1: R1_w=0 R10=fp0 fp-16_w=0 refs=3 100: (7b) *(u64 *)(r10 -24) = r9 ; frame1: R9=0 R10=fp0 fp-24_w=0 refs=3 101: (79) r1 = *(u64 *)(r8 +0) ; frame1: R1_w=trusted_ptr_seq_file() R8=trusted_ptr_bpf_iter_meta() refs=3 102: (bf) r4 = r10 ; frame1: R4_w=fp0 R10=fp0 refs=3 ; bpf_for_each(cpumask, cpu, mask) { 103: (07) r4 += -24 ; frame1: R4_w=fp-24 refs=3 ; BPF_SEQ_PRINTF(meta->seq, "nr_running %u nr_cpus %u psi_running %u\n", 104: (18) r2 = 0xffff9bce47e0e210 ; frame1: R2_w=map_value(map=cpumask_.rodata,ks=4,vs=41) refs=3 106: (b4) w3 = 41 ; frame1: R3_w=41 refs=3 107: (b4) w5 = 24 ; frame1: R5_w=24 refs=3 108: (85) call bpf_seq_printf#126 ; frame1: R0=scalar() refs=3 ; } 109: (95) exit bpf_rcu_read_unlock is missing processed 45 insns (limit 1000000) max_states_per_insn 0 total_states 5 peak_states 5 mark_read 3 ============= Another workaround is using the __always_inline : static __always_inline void read_percpu_data(struct bpf_iter_meta *meta, struct cgroup *cgrp, const cpumask_t *mask) > > > +SEC("iter.s/cgroup") > > +int BPF_PROG(test_cpumask_iter_sleepable, struct bpf_iter_meta *meta, struct cgroup *cgrp) > > +{ > > + struct task_struct *p; > > + > > + /* epilogue */ > > + if (!cgrp) > > + return 0; > > + > > + bpf_rcu_read_lock(); > > + p = bpf_task_from_pid(pid); > > + if (!p) { > > + bpf_rcu_read_unlock(); > > + return 1; > > + } > > + > > + READ_PERCPU_DATA(meta, cgrp, p->cpus_ptr); > > + bpf_task_release(p); > > + bpf_rcu_read_unlock(); > > + return 0; > > +} > > + > > [...] -- Regards Yafang