Hi David Vernet, Greeting! On 2022-04-22 at 10:33:52 -0700, David Vernet wrote: > test_cpu.c includes testcases that validate the cgroup cpu controller. > This patch adds a new testcase called test_cpucg_weight_overprovisioned() > that verifies the expected behavior of creating multiple processes with > different cpu.weight, on a system that is overprovisioned. > > So as to avoid code duplication, this patch also updates cpu_hog_func_param > to take a new hog_clock_type enum which informs how time is counted in > hog_cpus_timed() (either process time or wall clock time). > > Signed-off-by: David Vernet <void@xxxxxxxxxxxxx> > --- > tools/testing/selftests/cgroup/cgroup_util.c | 12 ++ > tools/testing/selftests/cgroup/cgroup_util.h | 1 + > tools/testing/selftests/cgroup/test_cpu.c | 135 ++++++++++++++++++- > 3 files changed, 145 insertions(+), 3 deletions(-) > Related commit in kernel: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6376b22cd0a3455a534b6921b816ffab68ddc48f Kernel: v6.4 ~ v6.9-rc5 I found test_cpu "test_cpucg_weight_overprovisioned" was failed on SPR(Sapphire Rapids) x86 server sometimes: " # ./test_cpu ok 1 test_cpucg_subtree_control ok 2 test_cpucg_stats not ok 3 test_cpucg_weight_overprovisioned ok 4 test_cpucg_weight_underprovisioned ok 5 test_cpucg_nested_weight_overprovisioned ok 6 test_cpucg_nested_weight_underprovisioned ok 7 test_cpucg_max ok 8 test_cpucg_max_nested " If I changed the "struct child children[3] = {NULL};" to "struct child children[1] = {NULL};", test_cpu case "test_cpucg_weight_overprovisioned" will not failed on SPR. I'm not familar with cgroup and does above change make sence, could you take a look for the test_cpu failed sometimes issue if you have time. Best Regards, Thanks a lot! > diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c > index 0cf7e90c0052..b690fdc8b4cd 100644 > --- a/tools/testing/selftests/cgroup/cgroup_util.c > +++ b/tools/testing/selftests/cgroup/cgroup_util.c > @@ -190,6 +190,18 @@ int cg_write(const char *cgroup, const char *control, char *buf) > return -1; > } > > +int cg_write_numeric(const char *cgroup, const char *control, long value) > +{ > + char buf[64]; > + int ret; > + > + ret = sprintf(buf, "%lu", value); > + if (ret < 0) > + return ret; > + > + return cg_write(cgroup, control, buf); > +} > + > int cg_find_unified_root(char *root, size_t len) > { > char buf[10 * PAGE_SIZE]; > diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h > index 1df13dc8b8aa..0f79156697cf 100644 > --- a/tools/testing/selftests/cgroup/cgroup_util.h > +++ b/tools/testing/selftests/cgroup/cgroup_util.h > @@ -35,6 +35,7 @@ extern long cg_read_long(const char *cgroup, const char *control); > long cg_read_key_long(const char *cgroup, const char *control, const char *key); > extern long cg_read_lc(const char *cgroup, const char *control); > extern int cg_write(const char *cgroup, const char *control, char *buf); > +int cg_write_numeric(const char *cgroup, const char *control, long value); > extern int cg_run(const char *cgroup, > int (*fn)(const char *cgroup, void *arg), > void *arg); > diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c > index 3bd61964a262..8d901c06c79d 100644 > --- a/tools/testing/selftests/cgroup/test_cpu.c > +++ b/tools/testing/selftests/cgroup/test_cpu.c > @@ -2,6 +2,8 @@ > > #define _GNU_SOURCE > #include <linux/limits.h> > +#include <sys/sysinfo.h> > +#include <sys/wait.h> > #include <errno.h> > #include <pthread.h> > #include <stdio.h> > @@ -10,9 +12,17 @@ > #include "../kselftest.h" > #include "cgroup_util.h" > > +enum hog_clock_type { > + // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock. > + CPU_HOG_CLOCK_PROCESS, > + // Count elapsed time using system wallclock time. > + CPU_HOG_CLOCK_WALL, > +}; > + > struct cpu_hog_func_param { > int nprocs; > struct timespec ts; > + enum hog_clock_type clock_type; > }; > > /* > @@ -118,8 +128,13 @@ static int hog_cpus_timed(const char *cgroup, void *arg) > (struct cpu_hog_func_param *)arg; > struct timespec ts_run = param->ts; > struct timespec ts_remaining = ts_run; > + struct timespec ts_start; > int i, ret; > > + ret = clock_gettime(CLOCK_MONOTONIC, &ts_start); > + if (ret != 0) > + return ret; > + > for (i = 0; i < param->nprocs; i++) { > pthread_t tid; > > @@ -135,9 +150,19 @@ static int hog_cpus_timed(const char *cgroup, void *arg) > if (ret && errno != EINTR) > return ret; > > - ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); > - if (ret != 0) > - return ret; > + if (param->clock_type == CPU_HOG_CLOCK_PROCESS) { > + ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); > + if (ret != 0) > + return ret; > + } else { > + struct timespec ts_current; > + > + ret = clock_gettime(CLOCK_MONOTONIC, &ts_current); > + if (ret != 0) > + return ret; > + > + ts_total = timespec_sub(&ts_current, &ts_start); > + } > > ts_remaining = timespec_sub(&ts_run, &ts_total); > } > @@ -176,6 +201,7 @@ static int test_cpucg_stats(const char *root) > .tv_sec = usage_seconds, > .tv_nsec = 0, > }, > + .clock_type = CPU_HOG_CLOCK_PROCESS, > }; > if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) > goto cleanup; > @@ -197,6 +223,108 @@ static int test_cpucg_stats(const char *root) > return ret; > } > > +/* > + * First, this test creates the following hierarchy: > + * A > + * A/B cpu.weight = 50 > + * A/C cpu.weight = 100 > + * A/D cpu.weight = 150 > + * > + * A separate process is then created for each child cgroup which spawns as > + * many threads as there are cores, and hogs each CPU as much as possible > + * for some time interval. > + * > + * Once all of the children have exited, we verify that each child cgroup > + * was given proportional runtime as informed by their cpu.weight. > + */ > +static int test_cpucg_weight_overprovisioned(const char *root) > +{ > + struct child { > + char *cgroup; > + pid_t pid; > + long usage; > + }; > + int ret = KSFT_FAIL, i; > + char *parent = NULL; > + struct child children[3] = {NULL}; > + long usage_seconds = 10; > + > + parent = cg_name(root, "cpucg_test_0"); > + if (!parent) > + goto cleanup; > + > + if (cg_create(parent)) > + goto cleanup; > + > + if (cg_write(parent, "cgroup.subtree_control", "+cpu")) > + goto cleanup; > + > + for (i = 0; i < ARRAY_SIZE(children); i++) { > + children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i); > + if (!children[i].cgroup) > + goto cleanup; > + > + if (cg_create(children[i].cgroup)) > + goto cleanup; > + > + if (cg_write_numeric(children[i].cgroup, "cpu.weight", > + 50 * (i + 1))) > + goto cleanup; > + } > + > + for (i = 0; i < ARRAY_SIZE(children); i++) { > + struct cpu_hog_func_param param = { > + .nprocs = get_nprocs(), > + .ts = { > + .tv_sec = usage_seconds, > + .tv_nsec = 0, > + }, > + .clock_type = CPU_HOG_CLOCK_WALL, > + }; > + pid_t pid = cg_run_nowait(children[i].cgroup, hog_cpus_timed, > + (void *)¶m); > + if (pid <= 0) > + goto cleanup; > + children[i].pid = pid; > + } > + > + for (i = 0; i < ARRAY_SIZE(children); i++) { > + int retcode; > + > + waitpid(children[i].pid, &retcode, 0); > + if (!WIFEXITED(retcode)) > + goto cleanup; > + if (WEXITSTATUS(retcode)) > + goto cleanup; > + } > + > + for (i = 0; i < ARRAY_SIZE(children); i++) > + children[i].usage = cg_read_key_long(children[i].cgroup, > + "cpu.stat", "usage_usec"); > + > + for (i = 0; i < ARRAY_SIZE(children) - 1; i++) { > + long delta; > + > + if (children[i + 1].usage <= children[i].usage) > + goto cleanup; > + > + delta = children[i + 1].usage - children[i].usage; > + if (!values_close(delta, children[0].usage, 35)) > + goto cleanup; > + } > + > + ret = KSFT_PASS; > +cleanup: > + for (i = 0; i < ARRAY_SIZE(children); i++) { > + cg_destroy(children[i].cgroup); > + free(children[i].cgroup); > + } > + cg_destroy(parent); > + free(parent); > + > + return ret; > +} > + > #define T(x) { x, #x } > struct cpucg_test { > int (*fn)(const char *root); > @@ -204,6 +332,7 @@ struct cpucg_test { > } tests[] = { > T(test_cpucg_subtree_control), > T(test_cpucg_stats), > + T(test_cpucg_weight_overprovisioned), > }; > #undef T > > -- > 2.30.2 >