On 10/08/2023 09:13, Chuyi Zhou wrote: > This patch adds a test which implements a priority-based policy through > bpf_oom_evaluate_task. > > The BPF program, oom_policy.c, compares the cgroup priority of two tasks > and select the lower one. The userspace program test_oom_policy.c > maintains a priority map by using cgroup id as the keys and priority as > the values. We could protect certain cgroups from oom-killer by setting > higher priority. > > Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> > --- > .../bpf/prog_tests/test_oom_policy.c | 140 ++++++++++++++++++ > .../testing/selftests/bpf/progs/oom_policy.c | 104 +++++++++++++ > 2 files changed, 244 insertions(+) > create mode 100644 tools/testing/selftests/bpf/prog_tests/test_oom_policy.c > create mode 100644 tools/testing/selftests/bpf/progs/oom_policy.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c b/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c > new file mode 100644 > index 000000000000..bea61ff22603 > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c > @@ -0,0 +1,140 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +#define _GNU_SOURCE > + > +#include <stdio.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <stdlib.h> > +#include <signal.h> > +#include <sys/stat.h> > +#include <test_progs.h> > +#include <bpf/btf.h> > +#include <bpf/bpf.h> > + > +#include "cgroup_helpers.h" > +#include "oom_policy.skel.h" > + > +static int map_fd; > +static int cg_nr; > +struct { > + const char *path; > + int fd; > + unsigned long long id; > +} cgs[] = { > + { "/cg1" }, > + { "/cg2" }, > +}; > + > + > +static struct oom_policy *open_load_oom_policy_skel(void) > +{ > + struct oom_policy *skel; > + int err; > + > + skel = oom_policy__open(); > + if (!ASSERT_OK_PTR(skel, "skel_open")) > + return NULL; > + > + err = oom_policy__load(skel); > + if (!ASSERT_OK(err, "skel_load")) > + goto cleanup; > + > + return skel; > + > +cleanup: > + oom_policy__destroy(skel); > + return NULL; > +} > + > +static void run_memory_consume(unsigned long long consume_size, int idx) > +{ > + char *buf; > + > + join_parent_cgroup(cgs[idx].path); > + buf = malloc(consume_size); > + memset(buf, 0, consume_size); > + sleep(2); > + exit(0); > +} > + > +static int set_cgroup_prio(unsigned long long cg_id, int prio) > +{ > + int err; > + > + err = bpf_map_update_elem(map_fd, &cg_id, &prio, BPF_ANY); > + ASSERT_EQ(err, 0, "update_map"); > + return err; > +} > + > +static int prepare_cgroup_environment(void) > +{ > + int err; > + > + err = setup_cgroup_environment(); > + if (err) > + goto clean_cg_env; > + for (int i = 0; i < cg_nr; i++) { > + err = cgs[i].fd = create_and_get_cgroup(cgs[i].path); > + if (!ASSERT_GE(cgs[i].fd, 0, "cg_create")) > + goto clean_cg_env; > + cgs[i].id = get_cgroup_id(cgs[i].path); > + } > + return 0; > +clean_cg_env: > + cleanup_cgroup_environment(); > + return err; > +} > + > +void test_oom_policy(void) > +{ > + struct oom_policy *skel; > + struct bpf_link *link; > + int err; > + int victim_pid; > + unsigned long long victim_cg_id; > + > + link = NULL; > + cg_nr = ARRAY_SIZE(cgs); > + > + skel = open_load_oom_policy_skel(); > + err = oom_policy__attach(skel); > + if (!ASSERT_OK(err, "oom_policy__attach")) > + goto cleanup; > + > + map_fd = bpf_object__find_map_fd_by_name(skel->obj, "cg_map"); > + if (!ASSERT_GE(map_fd, 0, "find map")) > + goto cleanup; > + > + err = prepare_cgroup_environment(); > + if (!ASSERT_EQ(err, 0, "prepare cgroup env")) > + goto cleanup; > + > + write_cgroup_file("/", "memory.max", "10M"); > + > + /* > + * Set higher priority to cg2 and lower to cg1, so we would select > + * task under cg1 as victim.(see oom_policy.c) > + */ > + set_cgroup_prio(cgs[0].id, 10); > + set_cgroup_prio(cgs[1].id, 50); > + > + victim_cg_id = cgs[0].id; > + victim_pid = fork(); > + > + if (victim_pid == 0) > + run_memory_consume(1024 * 1024 * 4, 0); > + > + if (fork() == 0) > + run_memory_consume(1024 * 1024 * 8, 1); > + > + while (wait(NULL) > 0) > + ; > + > + ASSERT_EQ(skel->bss->victim_pid, victim_pid, "victim_pid"); > + ASSERT_EQ(skel->bss->victim_cg_id, victim_cg_id, "victim_cgid"); > + ASSERT_EQ(skel->bss->failed_cnt, 1, "failed_cnt"); > +cleanup: > + bpf_link__destroy(link); > + oom_policy__destroy(skel); > + cleanup_cgroup_environment(); > +} > diff --git a/tools/testing/selftests/bpf/progs/oom_policy.c b/tools/testing/selftests/bpf/progs/oom_policy.c > new file mode 100644 > index 000000000000..fc9efc93914e > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/oom_policy.c > @@ -0,0 +1,104 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +#include <vmlinux.h> > +#include <bpf/bpf_tracing.h> > +#include <bpf/bpf_helpers.h> > + > +char _license[] SEC("license") = "GPL"; > + > +struct { > + __uint(type, BPF_MAP_TYPE_HASH); > + __type(key, int); > + __type(value, int); > + __uint(max_entries, 24); > +} cg_map SEC(".maps"); > + > +unsigned int victim_pid; > +u64 victim_cg_id; > +int failed_cnt; > + > +#define EOPNOTSUPP 95 > + > +enum { > + NO_BPF_POLICY, > + BPF_EVAL_ABORT, > + BPF_EVAL_NEXT, > + BPF_EVAL_SELECT, > +}; When I built a kernel using this series and tried building the associated test for that kernel I saw: progs/oom_policy.c:22:2: error: redefinition of enumerator 'NO_BPF_POLICY' NO_BPF_POLICY, ^ /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75894:2: note: previous definition is here NO_BPF_POLICY = 0, ^ progs/oom_policy.c:23:2: error: redefinition of enumerator 'BPF_EVAL_ABORT' BPF_EVAL_ABORT, ^ /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75895:2: note: previous definition is here BPF_EVAL_ABORT = 1, ^ progs/oom_policy.c:24:2: error: redefinition of enumerator 'BPF_EVAL_NEXT' BPF_EVAL_NEXT, ^ /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75896:2: note: previous definition is here BPF_EVAL_NEXT = 2, ^ progs/oom_policy.c: CLNG-BPF [test_maps] tailcall_bpf2bpf4.bpf.o 25:2: error: redefinition of enumerator 'BPF_EVAL_SELECT' BPF_EVAL_SELECT, ^ /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75897:2: note: previous definition is here BPF_EVAL_SELECT = 3, ^ 4 errors generated. So you shouldn't need the enum definition since it already makes it into vmlinux.h. I also ran into test failures when I removed the above (and compilation succeeded): test_oom_policy:PASS:prepare cgroup env 0 nsec (cgroup_helpers.c:130: errno: No such file or directory) Opening /mnt/cgroup-test-work-dir23054//memory.max set_cgroup_prio:PASS:update_map 0 nsec set_cgroup_prio:PASS:update_map 0 nsec test_oom_policy:FAIL:victim_pid unexpected victim_pid: actual 0 != expected 23058 test_oom_policy:FAIL:victim_cgid unexpected victim_cgid: actual 0 != expected 68 test_oom_policy:FAIL:failed_cnt unexpected failed_cnt: actual 0 != expected 1 #154 oom_policy:FAIL Summary: 1/0 PASSED, 0 SKIPPED, 1 FAILED So it seems that because my system was using the cgroupv1 memory controller, it could not be used for v2 unless I rebooted with systemd.unified_cgroup_hierarchy=1 ...on the boot commandline. It would be good to note any such requirements for this test in the selftests/bpf/README.rst. Might also be worth adding write_cgroup_file("", "cgroup.subtree_control", "+memory"); ...to ensure the memory controller is enabled for the root cgroup. At that point the test still failed: set_cgroup_prio:PASS:update_map 0 nsec test_oom_policy:FAIL:victim_pid unexpected victim_pid: actual 0 != expected 12649 test_oom_policy:FAIL:victim_cgid unexpected victim_cgid: actual 0 != expected 9583 test_oom_policy:FAIL:failed_cnt unexpected failed_cnt: actual 0 != expected 1 #154 oom_policy:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED Successfully unloaded bpf_testmod.ko. Are there other implicit assumptions about configuration that cause this test to fail perhaps? Alan