On Wed, Sep 02, 2020 at 05:15:06PM +0530, Pratik Rajesh Sampat wrote: > Measure cpuidle latencies on wakeup to determine and compare with the > advertsied wakeup latencies for each idle state. > > Cpuidle wakeup latencies are determined for IPIs and Timer events and > can help determine any deviations from what is advertsied by the > hardware. > > A baseline measurement for each case of IPI and timers is taken at > 100 percent CPU usage to quantify for the kernel-userpsace overhead > during execution. > > Signed-off-by: Pratik Rajesh Sampat <psampat@xxxxxxxxxxxxx> > --- > tools/testing/selftests/Makefile | 1 + > tools/testing/selftests/cpuidle/Makefile | 7 + > tools/testing/selftests/cpuidle/cpuidle.c | 616 ++++++++++++++++++++++ > tools/testing/selftests/cpuidle/settings | 1 + > 4 files changed, 625 insertions(+) > create mode 100644 tools/testing/selftests/cpuidle/Makefile > create mode 100644 tools/testing/selftests/cpuidle/cpuidle.c > create mode 100644 tools/testing/selftests/cpuidle/settings > > diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile > index 9018f45d631d..2bb0e87f76fd 100644 > --- a/tools/testing/selftests/Makefile > +++ b/tools/testing/selftests/Makefile > @@ -8,6 +8,7 @@ TARGETS += cgroup > TARGETS += clone3 > TARGETS += core > TARGETS += cpufreq > +TARGETS += cpuidle > TARGETS += cpu-hotplug > TARGETS += drivers/dma-buf > TARGETS += efivarfs > diff --git a/tools/testing/selftests/cpuidle/Makefile b/tools/testing/selftests/cpuidle/Makefile > new file mode 100644 > index 000000000000..d332485e1bc5 > --- /dev/null > +++ b/tools/testing/selftests/cpuidle/Makefile > @@ -0,0 +1,7 @@ > +# SPDX-License-Identifier: GPL-2.0 > +TEST_GEN_PROGS := cpuidle > + > +CFLAGS += -O2 > +LDLIBS += -lpthread > + > +include ../lib.mk > diff --git a/tools/testing/selftests/cpuidle/cpuidle.c b/tools/testing/selftests/cpuidle/cpuidle.c > new file mode 100644 > index 000000000000..4b1e7a91f75c > --- /dev/null > +++ b/tools/testing/selftests/cpuidle/cpuidle.c > @@ -0,0 +1,616 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Cpuidle latency measurement microbenchmark > + * > + * A mechanism to measure wakeup latency for IPI and Timer based interrupts > + * Results of this microbenchmark can be used to check and validate against the > + * advertised latencies for each cpuidle state > + * > + * IPIs (using pipes) and Timers are used to wake the CPU up and measure the > + * time difference > + * > + * Usage: > + * ./cpuidle --mode <full / quick / num_cpus> --output <output location> > + * > + * Copyright (C) 2020 Pratik Rajesh Sampat <psampat@xxxxxxxxxxxxx>, IBM > + */ > + > +#define _GNU_SOURCE > +#include <assert.h> > +#include <dirent.h> > +#include <fcntl.h> > +#include <getopt.h> > +#include <inttypes.h> > +#include <limits.h> > +#include <pthread.h> > +#include <sched.h> > +#include <signal.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/time.h> > +#include <unistd.h> > + > +#define READ 0 > +#define WRITE 1 > +#define TIMEOUT_US 500000 > + > +static int pipe_fd[2]; > +static int *cpu_list; > +static int cpus; > +static int idle_states; > +static uint64_t *latency_list; > +static uint64_t *residency_list; > + > +static char *log_file = "cpuidle.log"; > + > +static int get_online_cpus(int *online_cpu_list, int total_cpus) > +{ > + char filename[80]; > + int i, index = 0; > + FILE *fptr; > + > + for (i = 0; i < total_cpus; i++) { > + char status; > + > + sprintf(filename, "/sys/devices/system/cpu/cpu"); > + sprintf(filename + strlen(filename), "%d%s", i, "/online"); > + fptr = fopen(filename, "r"); > + if (!fptr) > + continue; > + assert(fscanf(fptr, "%c", &status) != EOF); > + if (status == '1') > + online_cpu_list[index++] = i; > + fclose(fptr); > + } > + return index; > +} > + > +static uint64_t us_to_ns(uint64_t val) > +{ > + return val * 1000; > +} > + > +static void get_latency(int cpu) > +{ > + char filename[80]; > + uint64_t latency; > + FILE *fptr; > + int state; > + > + for (state = 0; state < idle_states; state++) { > + sprintf(filename, "%s%d%s%d%s", "/sys/devices/system/cpu/cpu", > + cpu, "/cpuidle/state", > + state, "/latency"); > + fptr = fopen(filename, "r"); > + assert(fptr); > + > + assert(fscanf(fptr, "%ld", &latency) != EOF); > + latency_list[state] = latency; > + fclose(fptr); > + } > +} > + > +static void get_residency(int cpu) > +{ > + uint64_t residency; > + char filename[80]; > + FILE *fptr; > + int state; > + > + for (state = 0; state < idle_states; state++) { > + sprintf(filename, "%s%d%s%d%s", "/sys/devices/system/cpu/cpu", > + cpu, "/cpuidle/state", > + state, "/residency"); > + fptr = fopen(filename, "r"); > + assert(fptr); > + > + assert(fscanf(fptr, "%ld", &residency) != EOF); > + residency_list[state] = residency; > + fclose(fptr); > + } > +} > + > +static int get_idle_state_count(int cpu) > +{ > + struct dirent *entry; > + int dir_count = 0; > + char filename[80]; > + DIR *dirp; > + > + sprintf(filename, "%s%d%s", "/sys/devices/system/cpu/cpu", > + cpu, "/cpuidle"); > + > + dirp = opendir(filename); > + if (!dirp) > + return -1; > + while (entry = readdir(dirp)) { > + if (entry->d_type == DT_DIR) { > + if (strcmp(entry->d_name, ".") == 0 || > + strcmp(entry->d_name, "..") == 0) > + continue; > + dir_count++; > + } > + } > + closedir(dirp); > + return dir_count; > +} > + > +/* Enable or disable all idle states */ > +static int state_all_idle(char *disable) > +{ > + char filename[80]; > + FILE *fptr; > + int i, j; > + > + for (i = 0; i < cpus; i++) { > + for (j = 0; j < idle_states; j++) { > + sprintf(filename, "%s%d%s%d%s", > + "/sys/devices/system/cpu/cpu", cpu_list[i], > + "/cpuidle/state", j, "/disable"); > + fptr = fopen(filename, "w"); > + assert(fptr); > + fprintf(fptr, "%s", disable); > + fclose(fptr); > + } > + } > + return 0; > +} > + > +/* Disable all idle states */ > +static int cpuidle_disable_all_states(void) > +{ > + return state_all_idle("1"); > +} > + > +static int cpuidle_enable_all_states(void) > +{ > + return state_all_idle("0"); > +} > + > +static int state_operation(char *disable, int state) > +{ > + char filename[80]; > + FILE *fptr; > + int i; > + > + for (i = 0; i < cpus; i++) { > + sprintf(filename, "%s%d%s%d%s", "/sys/devices/system/cpu/cpu", > + cpu_list[i], "/cpuidle/state", state, "/disable"); > + fptr = fopen(filename, "w"); > + assert(fptr); > + fprintf(fptr, "%s", disable); > + fclose(fptr); > + } > + return 0; > +} > + > +static int cpuidle_enable_state(int state) > +{ > + return state_operation("0", state); > +} > + > +static int cpuidle_disable_state(int state) > +{ > + return state_operation("1", state); > +} > + > +static uint64_t average(uint64_t *arr, int size) > +{ > + int i, sum = 0; > + > + assert(size != 0); > + for (i = 0; i < size; i++) > + sum += arr[i]; > + return sum / size; > +} > + > +static pthread_t start_thread_on(void *(*fn)(void *), void *arg, uint64_t cpu) > +{ > + pthread_attr_t attr; > + cpu_set_t cpuset; > + pthread_t tid; > + int rc; > + > + CPU_ZERO(&cpuset); > + CPU_SET(cpu, &cpuset); > + > + rc = pthread_attr_init(&attr); > + if (rc) { > + perror("pthread_attr_init"); > + exit(1); > + } > + > + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); > + if (rc) { > + perror("pthread_attr_setaffinity_np"); > + exit(1); > + } > + > + rc = pthread_create(&tid, &attr, fn, arg); > + if (rc) { > + perror("pthread_create"); > + exit(1); > + } > + return tid; > +} > + > +void *util_full_cpu(void *unused) > +{ > + FILE *fptr; > + > + fptr = fopen("/dev/null", "w"); > + assert(fptr); > + while (1) { > + fprintf(fptr, "0"); > + nanosleep((const struct timespec[]){{0, 10L}}, NULL); It is not clear why the nanosleep is needed. Perhaps you want the equivalent of cpu_relax() in the linux kernel ? > + } > + fclose(fptr); > +} > + > +/* IPI based wakeup latencies */ > +struct latency { > + unsigned int src_cpu; > + unsigned int dest_cpu; > + struct timespec time_start; > + struct timespec time_end; > + uint64_t latency_ns; > +} ipi_wakeup; > + > +static void *writer(void *unused) > +{ > + signed char c = 'P'; > + > + assert(write(pipe_fd[WRITE], &c, 1) == 1); > + ipi_wakeup.src_cpu = sched_getcpu(); > + > + return NULL; > +} > + > +static void *reader(void *unused) > +{ > + signed char c; > + > + assert(read(pipe_fd[READ], &c, 1) == 1); > + ipi_wakeup.dest_cpu = sched_getcpu(); > + > + return NULL; > +} > + > +static void ipi_test_once(int baseline, int src_cpu, int dest_cpu) > +{ > + pthread_t tid, tid1, baseline_tid; > + > + if (baseline) { > + baseline_tid = start_thread_on(util_full_cpu, NULL, dest_cpu); > + /* Run process for long enough to gain 100% usage*/ > + sleep(2); > + } > + > + clock_gettime(CLOCK_REALTIME, &ipi_wakeup.time_start); > + If we capture the time_start here, it would also include the time required to create a thread and run it on src_cpu. Shouldn't we move clock_gettime(time_start) inside the writer, just before the write(pipe) and another clock_gettime(time_end) in the reader soon after the read(pipe) returns ? > + tid = start_thread_on(writer, NULL, src_cpu); > + pthread_join(tid, NULL); > + tid1 = start_thread_on(reader, NULL, dest_cpu); > + pthread_join(tid1, NULL); > + > + clock_gettime(CLOCK_REALTIME, &ipi_wakeup.time_end); > + ipi_wakeup.latency_ns = (ipi_wakeup.time_end.tv_sec - > + ipi_wakeup.time_start.tv_sec) * 1000000000ULL + > + ipi_wakeup.time_end.tv_nsec - ipi_wakeup.time_start.tv_nsec; > + > + if (baseline) > + pthread_cancel(baseline_tid); > +} > + > +static void ipi_test(int src_cpu) > +{ > + uint64_t avg_arr[cpus], avg_latency; > + int cpu, state; > + FILE *fptr; > + > + assert(cpuidle_disable_all_states() == 0); > + > + if (pipe(pipe_fd)) > + exit(1); > + > + fptr = fopen(log_file, "a"); > + fprintf(fptr, "----IPI TEST----\n"); > + > + fprintf(fptr, "----Baseline IPI Latency----\n"); > + fprintf(fptr, "%s %10s %18s\n", "SRC_CPU", "DEST_CPU", > + "Baseline_latency(ns)"); > + /* Run the test as dummy once to stablize */ > + ipi_test_once(1, src_cpu, cpu_list[0]); > + for (cpu = 0; cpu < cpus; cpu++) { > + ipi_test_once(1, src_cpu, cpu_list[cpu]); > + fprintf(fptr, "%3d %10d %12ld\n", ipi_wakeup.src_cpu, > + ipi_wakeup.dest_cpu, > + ipi_wakeup.latency_ns); > + avg_arr[cpu] = ipi_wakeup.latency_ns; > + } > + avg_latency = average(avg_arr, cpus); > + fprintf(fptr, "Baseline average IPI latency(ns): %ld\n\n", avg_latency); > + > + for (state = 0; state < idle_states; state++) { > + fprintf(fptr, "--Enabling state: %d--\n", state); > + assert(cpuidle_enable_state(state) == 0); > + fprintf(fptr, "%s %10s %18s\n", "SRC_CPU", "DEST_CPU", > + "IPI_Latency(ns)"); > + for (cpu = 0; cpu < cpus; cpu++) { > + /* Allow sufficient cycles to go idle */ > + sleep(1); > + ipi_test_once(0, src_cpu, cpu_list[cpu]); > + fprintf(fptr, "%3d %10d %18ld\n", ipi_wakeup.src_cpu, > + ipi_wakeup.dest_cpu, > + ipi_wakeup.latency_ns); > + avg_arr[cpu] = ipi_wakeup.latency_ns; > + } > + fprintf(fptr, "Expected Latency(ns): %ld\n", > + us_to_ns(latency_list[state])); > + avg_latency = average(avg_arr, cpus); > + fprintf(fptr, "Observed Average IPI latency(ns): %ld\n\n", > + avg_latency); > + assert(cpuidle_disable_state(state) == 0); > + } > + > + assert(cpuidle_enable_all_states() == 0); > + fclose(fptr); > +} > + > +/* Timer based wakeup latencies */ > +static int soak_done; > +struct timer_data { > + unsigned int src_cpu; > + uint64_t timeout; > + struct timespec time_start; > + struct timespec time_end; > + uint64_t timeout_diff_ns; > +} timer_wakeup; > + > +void catch_alarm(int sig) > +{ > + soak_done = 1; > +} > + > +static void setup_timer(void) > +{ > + struct itimerval timer_settings = {}; > + int err; > + > + timer_settings.it_value.tv_usec = timer_wakeup.timeout; > + err = setitimer(ITIMER_REAL, &timer_settings, NULL); > + if (err < 0) { > + perror("failed to arm interval timer"); > + exit(1); > + } > + signal(SIGALRM, catch_alarm); > + while (!soak_done) > + sleep(1); > +} > + > +static void *queue_timer(void *timeout) > +{ > + timer_wakeup.src_cpu = sched_getcpu(); > + timer_wakeup.timeout = (uint64_t)timeout; > + setup_timer(); > + > + return NULL; > +} > + > +static void timeout_test_once(int baseline, uint64_t timeout, int dest_cpu) > +{ > + pthread_t tid, baseline_tid; > + > + if (baseline) { > + baseline_tid = start_thread_on(util_full_cpu, NULL, dest_cpu); > + /* Run process for long enough to gain 100% usage */ > + sleep(2); > + } > + > + clock_gettime(CLOCK_REALTIME, &timer_wakeup.time_start); > + Here too, should we move the clock_gettime(time_start) in setup_timer() just before calling setittimer() and clockgettime(time_end) in catch_alarm() ? > + tid = start_thread_on(queue_timer, (void *)timeout, dest_cpu); > + pthread_join(tid, NULL); > + > + clock_gettime(CLOCK_REALTIME, &timer_wakeup.time_end); > + timer_wakeup.timeout_diff_ns = (timer_wakeup.time_end.tv_sec - > + timer_wakeup.time_start.tv_sec) * 1000000000ULL + > + (timer_wakeup.time_end.tv_nsec - timer_wakeup.time_start.tv_nsec); > + if (baseline) > + pthread_cancel(baseline_tid); > +} > + > +static void timeout_test(unsigned long timeout) > +{ > + uint64_t avg_arr[cpus], avg_timeout_diff; > + int state, cpu; > + FILE *fptr; > + > + assert(cpuidle_disable_all_states() == 0); > + fptr = fopen(log_file, "a"); > + fprintf(fptr, "----TIMEOUT TEST----\n"); > + > + fprintf(fptr, "----Baseline Timeout Latency diff----\n"); > + fprintf(fptr, "%s %10s\n", "SRC_CPU", "Baseline_Latency(ns)"); > + /* Run the test as dummy once to stablize */ > + timeout_test_once(1, timeout, cpu_list[0]); > + for (cpu = 0; cpu < cpus; cpu++) { > + timeout_test_once(1, timeout, cpu_list[cpu]); > + fprintf(fptr, "%3d %11ld\n", timer_wakeup.src_cpu, > + timer_wakeup.timeout_diff_ns); > + avg_arr[cpu] = timer_wakeup.timeout_diff_ns; > + } > + avg_timeout_diff = average(avg_arr, cpus); > + fprintf(fptr, "Baseline Average Timeout diff(ns): %ld\n\n", > + avg_timeout_diff); > + > + for (state = 0; state < idle_states; state++) { > + fprintf(fptr, "--Enabling state: %d--\n", state); > + assert(cpuidle_enable_state(state) == 0); > + fprintf(fptr, "%s %11s\n", "SRC_CPU", "Timeout_Latency(ns)"); > + for (cpu = 0; cpu < cpus; cpu++) { > + /* Allow sufficient cycles to go idle */ > + sleep(1); > + timeout_test_once(0, timeout, > + cpu_list[cpu]); > + fprintf(fptr, "%3d %15ld\n", timer_wakeup.src_cpu, > + timer_wakeup.timeout_diff_ns); > + avg_arr[cpu] = timer_wakeup.timeout_diff_ns; > + } > + fprintf(fptr, "Expected Residency(ns): %ld\n", > + us_to_ns(residency_list[state])); > + avg_timeout_diff = average(avg_arr, cpus); > + fprintf(fptr, "Observed Average Timeout diff(ns): %ld\n\n", > + avg_timeout_diff); > + assert(cpuidle_disable_state(state) == 0); > + } > + assert(cpuidle_enable_all_states() == 0); > + fclose(fptr); > +} > + > +static struct option options[] = { > + {"output", required_argument, 0, 'o'}, > + {"mode", required_argument, 0, 'm'}, > +}; > + > +static void usage(void) > +{ > + fprintf(stderr, "Usage: cpuidle <options>\n\n"); > + fprintf(stderr, "\t\t--mode=X\t(quick / full / <num_cpus>)\n"); > + fprintf(stderr, "\t\t--output=X\tOutput loc (def: cpuidle.log)\n\n"); > +} > + > +void get_n_random_cpus(int *shf_list, int shf_size, int *list, int n) > +{ > + /* Shuffle the Online CPU list */ > + int i; > + int shuffle_index_list[shf_size]; > + > + for (i = 0; i < shf_size; i++) > + shuffle_index_list[i] = i; > + for (i = 0; i < shf_size; i++) { > + int idx = i + rand() % (shf_size - i); > + int temp = shuffle_index_list[i]; > + > + shuffle_index_list[i] = shuffle_index_list[idx]; > + shuffle_index_list[idx] = temp; > + } > + > + /* Pick the first n from the shf_list elements */ > + for (i = 0; i < n; i++) > + list[i] = shf_list[shuffle_index_list[i]]; > +} > + > +int main(int argc, char *argv[]) > +{ > + int total_cpus, online_cpus, option_index = 0; > + int *online_cpu_list; > + signed char c; > + FILE *fptr; > + > + if (getuid()) { > + fprintf(stderr, "cpuidle latency test must run as root\n"); > + exit(1); > + } > + > + total_cpus = sysconf(_SC_NPROCESSORS_ONLN); > + online_cpu_list = malloc(total_cpus * sizeof(int)); > + if (!online_cpu_list) { > + perror("Malloc failure"); > + exit(1); > + } > + > + online_cpus = get_online_cpus(&online_cpu_list[0], total_cpus); > + if (!online_cpus) { > + perror("Unable to get online CPUS"); > + exit(1); > + } > + > + /* Get one CPU for a quick test */ > + cpus = 1; > + cpu_list = malloc(1 * sizeof(int)); > + srand(time(NULL)); > + cpu_list[0] = online_cpu_list[(rand() % online_cpus) + 1]; > + > + while (1) { > + c = getopt_long(argc, argv, "", options, &option_index); > + if (c == -1) > + break; > + > + switch (c) { > + case 'o': > + log_file = optarg; > + break; > + case 'm': > + if (!strcmp(optarg, "full")) { > + cpu_list = realloc(cpu_list, > + online_cpus * sizeof(int)); > + memcpy(cpu_list, online_cpu_list, > + online_cpus * sizeof(int)); > + cpus = online_cpus; > + } else if (strcmp(optarg, "quick")) { > + int opt_cpus; > + > + opt_cpus = atoi(optarg); > + if (!opt_cpus) { > + fprintf(stderr, "Error parsing mode\n"); > + usage(); > + exit(1); > + } > + if (opt_cpus > online_cpus) { > + fprintf(stderr, "Number of CPUS > Online CPUs\n"); > + usage(); > + exit(1); > + } > + cpu_list = realloc(cpu_list, > + opt_cpus * sizeof(int)); > + get_n_random_cpus(online_cpu_list, online_cpus, > + &cpu_list[0], opt_cpus); > + cpus = opt_cpus; > + } > + break; > + default: > + usage(); > + exit(1); > + } > + } > + > + idle_states = get_idle_state_count(online_cpu_list[0]); > + if (idle_states == -1) { > + perror("Unable to get idle states"); > + exit(1); > + } > + > + fptr = fopen(log_file, "w+"); > + fprintf(fptr, "cpuidle latency selftests. IPI & Timers\n"); > + fprintf(fptr, "Number of CPUS: %d\n", total_cpus); > + fprintf(fptr, "Number of idle states: %d\n", idle_states); > + fclose(fptr); > + > + latency_list = malloc(idle_states * sizeof(uint64_t)); > + residency_list = malloc(idle_states * sizeof(uint64_t)); > + if (!latency_list || !residency_list) { > + perror("Malloc failure"); > + exit(1); > + } > + > + get_latency(online_cpu_list[0]); > + get_residency(online_cpu_list[0]); > + > + ipi_test(online_cpu_list[0]); > + printf("IPI test done\n"); > + fflush(stdout); > + > + timeout_test(TIMEOUT_US); > + printf("Timeout test done\n"); > + fflush(stdout); > + > + printf("Output logged at: %s\n", log_file); > + > + free(latency_list); > + free(residency_list); > + free(cpu_list); > + free(online_cpu_list); > + return 0; > +} > diff --git a/tools/testing/selftests/cpuidle/settings b/tools/testing/selftests/cpuidle/settings > new file mode 100644 > index 000000000000..e7b9417537fb > --- /dev/null > +++ b/tools/testing/selftests/cpuidle/settings > @@ -0,0 +1 @@ > +timeout=0 > -- > 2.26.2 >