On 1/16/24 6:48 PM, Yafang Shao wrote:
Within the BPF program, we leverage the cgroup iterator to iterate through percpu runqueue data, specifically the 'nr_running' metric. Subsequently we expose this data to userspace by means of a sequence file. The CPU affinity for the cpumask is determined by the PID of a task: - PID of the init task (PID 1) We typically don't set CPU affinity for init task and thus we can iterate across all possible CPUs. However, in scenarios where you've set CPU affinity for the init task, you should set the cpumask of your current task to full-F. Then proceed to iterate through all possible CPUs using
Wat is full-F? It would be good if you can clarify in the commit message.
the current task. - PID of a task with defined CPU affinity The aim here is to iterate through a specific cpumask. This scenario aligns with tasks residing within a cpuset cgroup. - Invalid PID (e.g., PID -1) No cpumask is available in this case. The result as follows, #65/1 cpumask_iter/init_pid:OK #65/2 cpumask_iter/invalid_pid:OK #65/3 cpumask_iter/self_pid_one_cpu:OK #65/4 cpumask_iter/self_pid_multi_cpus:OK #65 cpumask_iter:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED CONFIG_PSI=y is required for this testcase. Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> --- tools/testing/selftests/bpf/config | 1 + .../selftests/bpf/prog_tests/cpumask_iter.c | 134 ++++++++++++++++++ .../selftests/bpf/progs/cpumask_common.h | 3 + .../selftests/bpf/progs/test_cpumask_iter.c | 56 ++++++++ 4 files changed, 194 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/cpumask_iter.c create mode 100644 tools/testing/selftests/bpf/progs/test_cpumask_iter.c diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c125c441abc7..9c42568ed376 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -78,6 +78,7 @@ CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_NAT=y +CONFIG_PSI=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c new file mode 100644 index 000000000000..984d01d09d79 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cpumask_iter.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Yafang Shao <laoar.shao@xxxxxxxxx> */ + +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <unistd.h> + +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "test_cpumask_iter.skel.h" + +static void verify_percpu_data(struct bpf_link *link, int nr_cpu_exp, int nr_running_exp) +{ + int iter_fd, len, item, nr_running, psi_running, nr_cpus; + static char buf[128];
why static?
+ size_t left; + char *p; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "iter_fd")) + return; + + memset(buf, 0, sizeof(buf)); + left = ARRAY_SIZE(buf); + p = buf; + while ((len = read(iter_fd, p, left)) > 0) { + p += len; + left -= len; + } + + item = sscanf(buf, "nr_running %u nr_cpus %u psi_running %u\n", + &nr_running, &nr_cpus, &psi_running); + if (nr_cpu_exp == -1) { + ASSERT_EQ(item, -1, "seq_format"); + goto out; + } + + ASSERT_EQ(item, 3, "seq_format"); + ASSERT_GE(nr_running, nr_running_exp, "nr_running"); + ASSERT_GE(psi_running, nr_running_exp, "psi_running"); + ASSERT_EQ(nr_cpus, nr_cpu_exp, "nr_cpus"); + + /* read() after iter finishes should be ok. */ + if (len == 0) + ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
The above 'if' statement is irrelevant to the main purpose of this test and can be removed.
+ +out: + close(iter_fd); +} + +void test_cpumask_iter(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + int nr_possible, cgrp_fd, pid, err, cnt, i; + struct test_cpumask_iter *skel = NULL;
= NULL is not needed.
+ union bpf_iter_link_info linfo; + int cpu_ids[] = {1, 3, 4, 5}; + struct bpf_link *link; + cpu_set_t set; + + skel = test_cpumask_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_for_each_cpu__open_and_load")) + return; + + if (setup_cgroup_environment()) + goto destroy; + + /* Utilize the cgroup iter */ + cgrp_fd = get_root_cgroup(); + if (!ASSERT_GE(cgrp_fd, 0, "create cgrp")) + goto cleanup; + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = cgrp_fd; + linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.cpu_cgroup, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + goto close_fd; + + skel->bss->target_pid = 1; + /* In case init task is set CPU affinity */ + err = sched_getaffinity(1, sizeof(set), &set); + if (!ASSERT_OK(err, "setaffinity")) + goto close_fd;
goto free_link.
+ + cnt = CPU_COUNT(&set); + nr_possible = bpf_num_possible_cpus(); + if (test__start_subtest("init_pid")) + /* curent task is running. */ + verify_percpu_data(link, cnt, cnt == nr_possible ? 1 : 0);
[...]