On Fri, Jan 19, 2024 at 7:46 AM Yonghong Song <yonghong.song@xxxxxxxxx> wrote: > > > On 1/16/24 6:48 PM, Yafang Shao wrote: > > Within the BPF program, we leverage the cgroup iterator to iterate through > > percpu runqueue data, specifically the 'nr_running' metric. Subsequently > > we expose this data to userspace by means of a sequence file. > > > > The CPU affinity for the cpumask is determined by the PID of a task: > > > > - PID of the init task (PID 1) > > We typically don't set CPU affinity for init task and thus we can iterate > > across all possible CPUs. However, in scenarios where you've set CPU > > affinity for the init task, you should set the cpumask of your current > > task to full-F. Then proceed to iterate through all possible CPUs using > > Wat is full-F? It would be good if you can clarify in the commit message. I mean set all available CPUs for the task. Will clarify it in the next version. > > > the current task. > > - PID of a task with defined CPU affinity > > The aim here is to iterate through a specific cpumask. This scenario > > aligns with tasks residing within a cpuset cgroup. > > - Invalid PID (e.g., PID -1) > > No cpumask is available in this case. > > > > The result as follows, > > #65/1 cpumask_iter/init_pid:OK > > #65/2 cpumask_iter/invalid_pid:OK > > #65/3 cpumask_iter/self_pid_one_cpu:OK > > #65/4 cpumask_iter/self_pid_multi_cpus:OK > > #65 cpumask_iter:OK > > Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED > > > > CONFIG_PSI=y is required for this testcase. > > > > Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> > > --- > > tools/testing/selftests/bpf/config | 1 + > > .../selftests/bpf/prog_tests/cpumask_iter.c | 134 ++++++++++++++++++ > > .../selftests/bpf/progs/cpumask_common.h | 3 + > > .../selftests/bpf/progs/test_cpumask_iter.c | 56 ++++++++ > > 4 files changed, 194 insertions(+) > > create mode 100644 tools/testing/selftests/bpf/prog_tests/cpumask_iter.c > > create mode 100644 tools/testing/selftests/bpf/progs/test_cpumask_iter.c > > > > diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config > > index c125c441abc7..9c42568ed376 100644 > > --- a/tools/testing/selftests/bpf/config > > +++ b/tools/testing/selftests/bpf/config > > @@ -78,6 +78,7 @@ CONFIG_NF_CONNTRACK_MARK=y > > CONFIG_NF_DEFRAG_IPV4=y > > CONFIG_NF_DEFRAG_IPV6=y > > CONFIG_NF_NAT=y > > +CONFIG_PSI=y > > CONFIG_RC_CORE=y > > CONFIG_SECURITY=y > > CONFIG_SECURITYFS=y > > diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c > > new file mode 100644 > > index 000000000000..984d01d09d79 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c > > @@ -0,0 +1,134 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* Copyright (c) 2024 Yafang Shao <laoar.shao@xxxxxxxxx> */ > > + > > +#define _GNU_SOURCE > > +#include <sched.h> > > +#include <stdio.h> > > +#include <unistd.h> > > + > > +#include <test_progs.h> > > +#include "cgroup_helpers.h" > > +#include "test_cpumask_iter.skel.h" > > + > > +static void verify_percpu_data(struct bpf_link *link, int nr_cpu_exp, int nr_running_exp) > > +{ > > + int iter_fd, len, item, nr_running, psi_running, nr_cpus; > > + static char buf[128]; > > why static? Will remove it. > > > + size_t left; > > + char *p; > > + > > + iter_fd = bpf_iter_create(bpf_link__fd(link)); > > + if (!ASSERT_GE(iter_fd, 0, "iter_fd")) > > + return; > > + > > + memset(buf, 0, sizeof(buf)); > > + left = ARRAY_SIZE(buf); > > + p = buf; > > + while ((len = read(iter_fd, p, left)) > 0) { > > + p += len; > > + left -= len; > > + } > > + > > + item = sscanf(buf, "nr_running %u nr_cpus %u psi_running %u\n", > > + &nr_running, &nr_cpus, &psi_running); > > + if (nr_cpu_exp == -1) { > > + ASSERT_EQ(item, -1, "seq_format"); > > + goto out; > > + } > > + > > + ASSERT_EQ(item, 3, "seq_format"); > > + ASSERT_GE(nr_running, nr_running_exp, "nr_running"); > > + ASSERT_GE(psi_running, nr_running_exp, "psi_running"); > > + ASSERT_EQ(nr_cpus, nr_cpu_exp, "nr_cpus"); > > + > > + /* read() after iter finishes should be ok. */ > > + if (len == 0) > > + ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read"); > > The above 'if' statement is irrelevant to the main purpose of this test > and can be removed. Will remove it. > > > + > > +out: > > + close(iter_fd); > > +} > > + > > +void test_cpumask_iter(void) > > +{ > > + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); > > + int nr_possible, cgrp_fd, pid, err, cnt, i; > > + struct test_cpumask_iter *skel = NULL; > > = NULL is not needed. Will change it. > > > + union bpf_iter_link_info linfo; > > + int cpu_ids[] = {1, 3, 4, 5}; > > + struct bpf_link *link; > > + cpu_set_t set; > > + > > + skel = test_cpumask_iter__open_and_load(); > > + if (!ASSERT_OK_PTR(skel, "test_for_each_cpu__open_and_load")) > > + return; > > + > > + if (setup_cgroup_environment()) > > + goto destroy; > > + > > + /* Utilize the cgroup iter */ > > + cgrp_fd = get_root_cgroup(); > > + if (!ASSERT_GE(cgrp_fd, 0, "create cgrp")) > > + goto cleanup; > > + > > + memset(&linfo, 0, sizeof(linfo)); > > + linfo.cgroup.cgroup_fd = cgrp_fd; > > + linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; > > + opts.link_info = &linfo; > > + opts.link_info_len = sizeof(linfo); > > + > > + link = bpf_program__attach_iter(skel->progs.cpu_cgroup, &opts); > > + if (!ASSERT_OK_PTR(link, "attach_iter")) > > + goto close_fd; > > + > > + skel->bss->target_pid = 1; > > + /* In case init task is set CPU affinity */ > > + err = sched_getaffinity(1, sizeof(set), &set); > > + if (!ASSERT_OK(err, "setaffinity")) > > + goto close_fd; > > goto free_link. Nice catch. will change it. > > > + > > + cnt = CPU_COUNT(&set); > > + nr_possible = bpf_num_possible_cpus(); > > + if (test__start_subtest("init_pid")) > > + /* curent task is running. */ > > + verify_percpu_data(link, cnt, cnt == nr_possible ? 1 : 0); > [...] -- Regards Yafang