oslat was initially a standalone program [1]. This patch merges oslat into rt-tests repo. This is a direct port of oslat v0.1.7 into rt-tests. It naturally bumps the version to latest rt-tests version. [1] https://github.com/xzpeter/oslat Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> --- .gitignore | 1 + Makefile | 10 +- src/oslat/oslat.8 | 66 ++++ src/oslat/oslat.c | 896 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 971 insertions(+), 2 deletions(-) create mode 100644 src/oslat/oslat.8 create mode 100644 src/oslat/oslat.c diff --git a/.gitignore b/.gitignore index bc01575..a975c4b 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ SRPMS /queuelat /ssdd /get_cyclictest_snapshot +/oslat rt-tests.spec tags diff --git a/Makefile b/Makefile index be78312..3f59efb 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,8 @@ sources = cyclictest.c \ cyclicdeadline.c \ deadline_test.c \ queuelat.c \ - ssdd.c + ssdd.c \ + oslat.c TARGETS = $(sources:.c=) LIBS = -lrt -lpthread @@ -48,7 +49,8 @@ MANPAGES = src/cyclictest/cyclictest.8 \ src/sched_deadline/deadline_test.8 \ src/ssdd/ssdd.8 \ src/sched_deadline/cyclicdeadline.8 \ - src/cyclictest/get_cyclictest_snapshot.8 + src/cyclictest/get_cyclictest_snapshot.8 \ + src/oslat/oslat.8 ifdef PYLIB MANPAGES += src/hwlatdetect/hwlatdetect.8 @@ -97,6 +99,7 @@ VPATH += src/hackbench: VPATH += src/sched_deadline: VPATH += src/queuelat: VPATH += src/ssdd: +VPATH += src/oslat: $(OBJDIR)/%.o: %.c | $(OBJDIR) $(CC) -D VERSION=$(VERSION) -c $< $(CFLAGS) $(CPPFLAGS) -o $@ @@ -164,6 +167,9 @@ queuelat: $(OBJDIR)/queuelat.o $(OBJDIR)/librttest.a ssdd: $(OBJDIR)/ssdd.o $(OBJDIR)/librttest.a $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB) +oslat: $(OBJDIR)/oslat.o $(OBJDIR)/librttest.a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB) $(NUMA_LIBS) + %.8.gz: %.8 gzip -nc $< > $@ diff --git a/src/oslat/oslat.8 b/src/oslat/oslat.8 new file mode 100644 index 0000000..83257c6 --- /dev/null +++ b/src/oslat/oslat.8 @@ -0,0 +1,66 @@ +.TH OSLAT 8 "August 17, 2020" +.\" for manpage-specific macros, see man(7) +.SH NAME +oslat \- OS Latency Detector +.SH SYNOPSIS +.SY oslat +.RI "[ \-shvz ] [ \-b " bucket-size " ] [ \-B " bias " ] [ \-c " cpu-list " ] \ +[ \-C " cpu-main-thread " ] [ \-f " rt-prio " ] [ \-m " workload-mem " ] \ +[\-t " runtime " ] [ \-T " trace-threshold " ] [ \-w " workload " ]" +.SH DESCRIPTION +.B oslat +is an open source userspace polling mode stress program to detect OS level +latency. The program runs a busy loop with no or various workload, collecting +TSC information and measure the time frequently during the process. +.SH OPTIONS +.TP +.B \-b, \-\-bucket-size=N +Specify the number of the buckets (4-1024). +.TP +.B \-B, \-\-bias=USEC +Add a bias to all the buckets using the estimated mininum. +.TP +.B \-c, \-\-cpu-list=CPULIST +Specify CPUs to run on. For example, '1,3,5,7-15'. +.TP +.B \-C, \-\-cpu-main-thread=CORE +Specify which CPU the main thread runs on. Default is cpu0. +.TP +.B \-f, \-\-rtprio=PRIORITY +Using specific SCHED_FIFO priority (1-99). Otherwise use the default +priority, normally it will be SCHED_OTHER. +.TP +.B \-m, \-\-workload-mem=SIZE +Size of the memory to use for the workload (e.g., 4K, 1M). +Total memory usage will be this value multiplies 2*N, +because there will be src/dst buffers for each thread, and +N is the number of processors for testing. +.TP +.B \-t, \-\-runtime=SEC +Specify test duration, e.g., 60, 20m, 2H (m/M: minutes, h/H: hours, d/D: days). +By default the unit is s/second. +.TP +.B \-T, \-\-trace-threshold=THRESHOLD +Stop the test when threshold triggered (in USEC). At the meantime, print a +marker in ftrace and stop ftrace too. +.TP +.B \-w, \-\-workload=WORKLOAD +Specify a kind of workload, default is no workload. Options: "no", "memmove". +.TP +.B \-s, \-\-single-preheat +Use a single thread when measuring latency at preheat stage +NOTE: please make sure the CPU frequency on all testing cores +are locked before using this parmater. If you don't know how +to lock the freq then please don't use this parameter. +.TP +.B \-h, \-\-help +Show the help message. +.TP +.B \-v, \-\-version +Show the version of the program. +.TP +.B \-z, \-\-zero-omit +Don't display buckets in the output histogram if all zeros. +.SH AUTHOR +.B oslat +was written by Peter Xu <peterx@xxxxxxxxxx>. diff --git a/src/oslat/oslat.c b/src/oslat/oslat.c new file mode 100644 index 0000000..d796919 --- /dev/null +++ b/src/oslat/oslat.c @@ -0,0 +1,896 @@ +/* + * oslat - OS latency detector + * + * Copyright 2020 Red Hat Inc. + * + * Authors: Peter Xu <peterx@xxxxxxxxxx> + * + * Some of the utility code based on sysjitter-1.3: + * Copyright 2010-2015 David Riddoch <david@xxxxxxxxxxxxxx> + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of version 3 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <inttypes.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdarg.h> +#include <unistd.h> +#include <fcntl.h> +#include <getopt.h> +#include <pthread.h> +#include <signal.h> +#include <sched.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <numa.h> +#include <math.h> +#include <limits.h> +#include <linux/unistd.h> + +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/utsname.h> +#include <sys/mman.h> +#include <sys/syscall.h> + +#include "rt-utils.h" +#include "error.h" + +#ifdef __GNUC__ +# define atomic_inc(ptr) __sync_add_and_fetch((ptr), 1) +# if defined(__x86_64__) +# define relax() __asm__ __volatile__("pause" ::: "memory") +static inline void frc(uint64_t* pval) +{ + uint32_t low, high; + /* See rdtsc_ordered() of Linux */ + __asm__ __volatile__("lfence"); + __asm__ __volatile__("rdtsc" : "=a" (low) , "=d" (high)); + *pval = ((uint64_t) high << 32) | low; +} +# elif defined(__i386__) +# define relax() __asm__ __volatile__("pause" ::: "memory") +static inline void frc(uint64_t* pval) +{ + __asm__ __volatile__("rdtsc" : "=A" (*pval)); +} +# elif defined(__PPC64__) +# define relax() do{}while(0) +static inline void frc(uint64_t* pval) +{ + __asm__ __volatile__("mfspr %0, 268\n" : "=r" (*pval)); +} +# else +# error Need frc() for this platform. +# endif +#else +# error Need to add support for this compiler. +#endif + +typedef uint64_t stamp_t; /* timestamp */ +typedef uint64_t cycles_t; /* number of cycles */ +typedef unsigned char bool; + +#define true 1 +#define false 0 + +enum command { + WAIT, + GO, + STOP +}; + +enum workload_type { + WORKLOAD_NONE = 0, + WORKLOAD_MEMMOVE, + WORKLOAD_NUM, +}; + +/* This workload needs pre-allocated memory */ +#define WORK_NEED_MEM (1UL << 0) + +typedef void (*workload_fn)(char *src, char *dst, size_t size); + +struct workload { + const char *w_name; + uint64_t w_flags; + workload_fn w_fn; +}; + +/* We'll have buckets 1us, 2us, ..., (BUCKET_SIZE) us. */ +#define BUCKET_SIZE (32) + +/* Default size of the workloads per thread (in bytes, which is 16KB) */ +#define WORKLOAD_MEM_SIZE (16UL << 10) + +/* By default, no workload */ +#define WORKLOAD_DEFUALT WORKLOAD_NONE + +struct thread { + int core_i; + pthread_t thread_id; + + /* NOTE! this is also how many ticks per us */ + unsigned cpu_mhz; + cycles_t int_total; + stamp_t frc_start; + stamp_t frc_stop; + cycles_t runtime; + stamp_t *buckets; + uint64_t minlat; + /* Maximum latency detected */ + uint64_t maxlat; + /* + * The extra part of the interruptions that cannot be put into even the + * biggest bucket. We'll use this to calculate a more accurate average at + * the end of the tests. + */ + uint64_t overflow_sum; + int memory_allocated; + + /* Buffers used for the workloads */ + char * src_buf; + char * dst_buf; + + /* These variables are calculated after the test */ + double average; +}; + +struct global { + /* Configuration. */ + unsigned runtime_secs; + /* Number of threads running for current test (either pre heat or real run) */ + unsigned n_threads; + /* Number of threads to test for the real run */ + unsigned n_threads_total; + struct timeval tv_start; + int rtprio; + int bucket_size; + int trace_threshold; + int runtime; + /* The core that we run the main thread. Default is cpu0 */ + int cpu_main_thread; + char * cpu_list; + char * app_name; + struct workload * workload; + uint64_t workload_mem_size; + int enable_bias; + uint64_t bias; + bool single_preheat_thread; + bool output_omit_zero_buckets; + + /* Mutable state. */ + volatile enum command cmd; + volatile unsigned n_threads_started; + volatile unsigned n_threads_ready; + volatile unsigned n_threads_running; + volatile unsigned n_threads_finished; +}; + +static struct global g; + +static void workload_nop(char *dst, char *src, size_t size) +{ + /* Nop */ +} + +static void workload_memmove(char *dst, char *src, size_t size) +{ + memmove(dst, src, size); +} + +struct workload workload_list[WORKLOAD_NUM] = { + { "no", 0, workload_nop }, + { "memmove", WORK_NEED_MEM, workload_memmove }, +}; + +#define TEST(x) \ + do { \ + if( ! (x) ) \ + test_fail(#x, __LINE__); \ + } while( 0 ) + +#define TEST0(x) TEST((x) == 0) + +static void test_fail(const char* what, int line) +{ + fprintf(stderr, "ERROR:\n"); + fprintf(stderr, "ERROR: TEST(%s)\n", what); + fprintf(stderr, "ERROR: at line %d\n", line); + fprintf(stderr, "ERROR: errno=%d (%s)\n", errno, strerror(errno)); + fprintf(stderr, "ERROR:\n"); + exit(1); +} + +static int move_to_core(int core_i) +{ + cpu_set_t cpus; + CPU_ZERO(&cpus); + CPU_SET(core_i, &cpus); + return sched_setaffinity(0, sizeof(cpus), &cpus); +} + +static cycles_t __measure_cpu_hz(void) +{ + struct timeval tvs, tve; + stamp_t s, e; + double sec; + + frc(&s); + e = s; + gettimeofday(&tvs, NULL); + while( e - s < 1000000 ) + frc(&e); + gettimeofday(&tve, NULL); + sec = tve.tv_sec - tvs.tv_sec + (tve.tv_usec - tvs.tv_usec) / 1e6; + return (cycles_t) ((e - s) / sec); +} + +static unsigned measure_cpu_mhz(void) +{ + cycles_t m, mprev, d; + + mprev = __measure_cpu_hz(); + do { + m = __measure_cpu_hz(); + if( m > mprev ) d = m - mprev; + else d = mprev - m; + mprev = m; + } while( d > m / 1000 ); + + return (unsigned) (m / 1000000); +} + +static void thread_init(struct thread* t) +{ + t->cpu_mhz = measure_cpu_mhz(); + t->maxlat = 0; + t->overflow_sum = 0; + t->minlat = (uint64_t)-1; + + /* NOTE: all the buffers are not freed until the process quits. */ + if (!t->memory_allocated) { + TEST(t->buckets = calloc(1, sizeof(t->buckets[0]) * g.bucket_size)); + if (g.workload->w_flags & WORK_NEED_MEM) { + TEST0(posix_memalign((void **)&t->src_buf, getpagesize(), + g.workload_mem_size)); + memset(t->src_buf, 0, g.workload_mem_size); + TEST0(posix_memalign((void **)&t->dst_buf, getpagesize(), + g.workload_mem_size)); + memset(t->dst_buf, 0, g.workload_mem_size); + } + t->memory_allocated = 1; + } else { + /* Clear the buckets */ + memset(t->buckets, 0, sizeof(t->buckets[0]) * g.bucket_size); + } +} + +static float cycles_to_sec(const struct thread* t, uint64_t cycles) +{ + return cycles / (t->cpu_mhz * 1e6); +} + +static void insert_bucket(struct thread *t, stamp_t value) +{ + int index, us; + uint64_t extra; + + index = value / t->cpu_mhz; + assert(index >= 0); + us = index + 1; + assert(us > 0); + + if (g.trace_threshold && us >= g.trace_threshold) { + char *line = "%s: Trace threshold (%d us) triggered with %u us! " + "Stopping the test.\n"; + tracemark(line, g.app_name, g.trace_threshold, us); + err_quit(line, g.app_name, g.trace_threshold, us); + } + + /* Update max latency */ + if (us > t->maxlat) { + t->maxlat = us; + } + + if (us < t->minlat) { + t->minlat = us; + } + + if (g.bias) { + /* t->bias will be set after pre-heat if user enabled it */ + us -= g.bias; + /* + * Negative should hardly happen, but if it happens, we assume we're in + * the smallest bucket, which is 1us. Same to index. + */ + if (us <= 0) { + us = 1; + } + index -= g.bias; + if (index < 0) { + index = 0; + } + } + + /* Too big the jitter; put into the last bucket */ + if (index >= g.bucket_size) { + /* Keep the extra bit (in us) */ + extra = index - g.bucket_size; + if (t->overflow_sum + extra < t->overflow_sum) { + /* The uint64_t even overflowed itself; bail out */ + printf("Accumulated overflow too much!\n"); + exit(1); + } + t->overflow_sum += extra; + index = g.bucket_size - 1; + } + + t->buckets[index]++; + if (t->buckets[index] == 0) { + printf("Bucket %d overflowed\n", index); + exit(1); + } +} + +static void doit(struct thread* t) +{ + stamp_t ts1, ts2; + workload_fn workload_fn = g.workload->w_fn; + + frc(&ts2); + do { + workload_fn(t->dst_buf, t->src_buf, g.workload_mem_size); + frc(&ts1); + insert_bucket(t, ts1 - ts2); + ts2 = ts1; + } while (g.cmd == GO); +} + +static int set_fifo_prio(int prio) +{ + struct sched_param param; + + memset(¶m, 0, sizeof(param)); + param.sched_priority = prio; + return sched_setscheduler(0, SCHED_FIFO, ¶m); +} + +static void* thread_main(void* arg) +{ + /* Important thing to note here is that once we start bashing the CPU, we + * need to keep doing so to prevent the core from changing frequency or + * dropping into a low power state. + */ + struct thread* t = arg; + + /* Alloc memory in the thread itself after setting affinity to get the + * best chance of getting numa-local memory. Doesn't matter so much for + * the "struct thread" since we expect that to stay cache resident. + */ + TEST(move_to_core(t->core_i) == 0); + if (g.rtprio) + TEST(set_fifo_prio(g.rtprio) == 0); + + /* Don't bash the cpu until all threads have got going. */ + atomic_inc(&g.n_threads_started); + while( g.cmd == WAIT ) + usleep(1000); + + thread_init(t); + + /* Ensure we all start at the same time. */ + atomic_inc(&g.n_threads_running); + while( g.n_threads_running != g.n_threads ) + relax(); + + frc(&t->frc_start); + doit(t); + frc(&t->frc_stop); + + t->runtime = t->frc_stop - t->frc_start; + + /* Wait for everyone to finish so we don't disturb them by exiting and + * waking the main thread. + */ + atomic_inc(&g.n_threads_finished); + while( g.n_threads_finished != g.n_threads ) + relax(); + + return NULL; +} + +#define putfield(label, val, fmt, end) do { \ + printf("%12s:\t", label); \ + for (i = 0; i < g.n_threads; ++i) \ + printf(" %"fmt, val); \ + printf("%s\n", end); \ + } while (0) + +void calculate(struct thread *t) +{ + int i, j; + double sum; + uint64_t count; + + for (i = 0; i < g.n_threads; ++i) { + /* Calculate average */ + sum = count = 0; + for (j = 0; j < g.bucket_size; j++) { + sum += 1.0 * t[i].buckets[j] * (g.bias+j+1); + count += t[i].buckets[j]; + } + /* Add the extra amount of huge spikes in */ + sum += t->overflow_sum; + t[i].average = sum / count; + } +} + +static void write_summary(struct thread* t) +{ + int i, j, k, print_dotdotdot = 0; + char bucket_name[64]; + + calculate(t); + + putfield("Core", t[i].core_i, "d", ""); + putfield("CPU Freq", t[i].cpu_mhz, "u", " (Mhz)"); + + for (j = 0; j < g.bucket_size; j++) { + if (j < g.bucket_size-1 && g.output_omit_zero_buckets) { + for (k = 0; k < g.n_threads; k++) { + if (t[k].buckets[j] != 0) + break; + } + if (k == g.n_threads) { + print_dotdotdot = 1; + continue; + } + } + + if (print_dotdotdot) { + printf(" ...\n"); + print_dotdotdot = 0; + } + + snprintf(bucket_name, sizeof(bucket_name), "%03"PRIu64 + " (us)", g.bias+j+1); + putfield(bucket_name, t[i].buckets[j], PRIu64, + (j==g.bucket_size-1) ? " (including overflows)" : ""); + } + + putfield("Minimum", t[i].minlat, PRIu64, " (us)"); + putfield("Average", t[i].average, ".3lf", " (us)"); + putfield("Maximum", t[i].maxlat, PRIu64, " (us)"); + putfield("Max-Min", t[i].maxlat - t[i].minlat, PRIu64, " (us)"); + putfield("Duration", cycles_to_sec(&(t[i]), t[i].runtime), + ".3f", " (sec)"); + printf("\n"); +} + +static void run_expt(struct thread* threads, int runtime_secs) +{ + int i; + + g.runtime_secs = runtime_secs; + g.n_threads_started = 0; + g.n_threads_ready = 0; + g.n_threads_running = 0; + g.n_threads_finished = 0; + g.cmd = WAIT; + + for( i = 0; i < g.n_threads; ++i ) { + TEST0(pthread_create(&(threads[i].thread_id), NULL, + thread_main, &(threads[i]))); + } + while( g.n_threads_started != g.n_threads ) { + usleep(1000); + } + + gettimeofday(&g.tv_start, NULL); + g.cmd = GO; + + alarm(runtime_secs); + + /* Go to sleep until the threads have done their stuff. */ + for( i = 0; i < g.n_threads; ++i ) { + pthread_join(threads[i].thread_id, NULL); + } +} + +static void handle_alarm(int code) +{ + g.cmd = STOP; +} + +const char *helpmsg = + "Usage: %s [options]\n" + "\n" + "This is an OS latency detector by running busy loops on specified cores.\n" + "Please run this tool using root.\n" + "\n" + "Available options:\n" + "\n" + " -b, --bucket-size Specify the number of the buckets (4-1024)\n" + " -B, --bias Add a bias to all the buckets using the estimated mininum\n" + " -c, --cpu-list Specify CPUs to run on, e.g. '1,3,5,7-15'\n" + " -C, --cpu-main-thread Specify which CPU the main thread runs on. Default is cpu0.\n" + " -f, --rtprio Using SCHED_FIFO priority (1-99)\n" + " -m, --workload-mem Size of the memory to use for the workload (e.g., 4K, 1M).\n" + " Total memory usage will be this value multiplies 2*N,\n" + " because there will be src/dst buffers for each thread, and\n" + " N is the number of processors for testing.\n" + " -s, --single-preheat Use a single thread when measuring latency at preheat stage\n" + " NOTE: please make sure the CPU frequency on all testing cores\n" + " are locked before using this parmater. If you don't know how\n" + " to lock the freq then please don't use this parameter.\n" + " -t, --runtime Specify test duration, e.g., 60, 20m, 2H\n" + " (m/M: minutes, h/H: hours, d/D: days)\n" + " -T, --trace-threshold Stop the test when threshold triggered (in us),\n" + " print a marker in ftrace and stop ftrace too.\n" + " -v, --version Display the version of the software.\n" + " -w, --workload Specify a kind of workload, default is no workload\n" + " (options: no, memmove)\n" + " -z, --zero-omit Don't display buckets in the output histogram if all zeros.\n" + "\n" + ; + +static void usage(void) +{ + printf(helpmsg, g.app_name); + exit(1); +} + +/* TODO: use libnuma? */ +static int parse_cpu_list(char *cpu_list, cpu_set_t *cpu_set) +{ + struct bitmask *cpu_mask; + int i, n_cores; + + n_cores = sysconf(_SC_NPROCESSORS_CONF); + + if (!cpu_list) { + for (i = 0; i < n_cores; i++) + CPU_SET(i, cpu_set); + return n_cores; + } + + cpu_mask = numa_parse_cpustring_all(cpu_list); + if (cpu_mask) { + for (i = 0; i < n_cores; i++) { + if (numa_bitmask_isbitset(cpu_mask, i)) { + CPU_SET(i, cpu_set); + } + } + numa_bitmask_free(cpu_mask); + } else { + warn("Unknown cpu-list: %s, using all available cpus\n", cpu_list); + for (i = 0; i < n_cores; i++) + CPU_SET(i, cpu_set); + } + + return n_cores; +} + +static int parse_runtime(const char *str) +{ + char *endptr; + int v = strtol(str, &endptr, 10); + + if (!*endptr) { + return v; + } + + switch (*endptr) { + case 'd': + case 'D': + /* Days */ + v *= 24; + case 'h': + case 'H': + /* Hours */ + v *= 60; + case 'm': + case 'M': + /* Minutes */ + v *= 60; + case 's': + case 'S': + /* Seconds */ + break; + default: + printf("Unknown runtime suffix: %s\n", endptr); + v = 0; + break; + } + + return v; +} + +static int parse_mem_size(char *str, uint64_t *val) +{ + char *endptr; + int v = strtol(str, &endptr, 10); + + if (!*endptr) { + return v; + } + + switch (*endptr) { + case 'g': + case 'G': + v *= 1024; + case 'm': + case 'M': + v *= 1024; + case 'k': + case 'K': + v *= 1024; + case 'b': + case 'B': + break; + default: + return -1; + } + + *val = v; + + return 0; +} + +static int workload_select(char *name) +{ + int i = 0; + + for (i = 0; i < WORKLOAD_NUM; i++) { + if (!strcmp(name, workload_list[i].w_name)) { + g.workload = &workload_list[i]; + return 0; + } + } + + return -1; +} + +/* Process commandline options */ +static void parse_options(int argc, char *argv[]) +{ + while (1) { + static struct option options[] = { + { "bucket-size", required_argument, NULL, 'b' }, + { "cpu-list", required_argument, NULL, 'c' }, + { "cpu-main-thread", required_argument, NULL, 'C'}, + { "runtime", required_argument, NULL, 't' }, + { "rtprio", required_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { "trace-threshold", required_argument, NULL, 'T' }, + { "workload", required_argument, NULL, 'w'}, + { "workload-mem", required_argument, NULL, 'm'}, + { "bias", no_argument, NULL, 'B'}, + { "single-preheat", no_argument, NULL, 's'}, + { "zero-omit", no_argument, NULL, 'u'}, + { "version", no_argument, NULL, 'v'}, + { NULL, 0, NULL, 0 }, + }; + int i, c = getopt_long(argc, argv, "b:Bc:C:f:hm:st:w:T:vz", + options, NULL); + long ncores; + + if (c == -1) + break; + + switch (c) { + case 'b': + g.bucket_size = strtol(optarg, NULL, 10); + if (g.bucket_size > 1024 || g.bucket_size <= 4) { + printf("Illegal bucket size: %s (should be: 4-1024)\n", + optarg); + exit(1); + } + break; + case 'B': + g.enable_bias = 1; + break; + case 'c': + g.cpu_list = strdup(optarg); + break; + case 'C': + ncores = sysconf(_SC_NPROCESSORS_CONF); + g.cpu_main_thread = strtol(optarg, NULL, 10); + if (g.cpu_main_thread < 0 || g.cpu_main_thread > ncores) { + printf("Illegal core for main thread: %s (should be: 0-%ld)\n", + optarg, ncores); + exit(1); + } + break; + case 't': + g.runtime = parse_runtime(optarg); + if (!g.runtime) { + printf("Illegal runtime: %s\n", optarg); + exit(1); + } + break; + case 'f': + g.rtprio = strtol(optarg, NULL, 10); + if (g.rtprio < 1 || g.rtprio > 99) { + printf("Illegal RT priority: %s (should be: 1-99)\n", optarg); + exit(1); + } + break; + case 'T': + g.trace_threshold = strtol(optarg, NULL, 10); + if (g.trace_threshold <= 0) { + printf("Parameter --trace-threshold needs to be positive\n"); + exit(1); + } + enable_trace_mark(); + break; + case 'w': + if (workload_select(optarg)) { + printf("Unknown workload '%s'. Please choose from: ", optarg); + for (i = 0; i < WORKLOAD_NUM; i++) { + printf("'%s'", workload_list[i].w_name); + if (i != WORKLOAD_NUM - 1) { + printf(", "); + } + } + printf("\n\n"); + exit(1); + } + break; + case 'm': + if (parse_mem_size(optarg, &g.workload_mem_size)) { + printf("Unknown workload memory size '%s'.\n\n", optarg); + exit(1); + } + break; + case 's': + /* + * Only use one core for pre-heat. Then if --bias is used, the + * bias will be exactly the min value of the pre-heat core. + */ + g.single_preheat_thread = true; + break; + case 'v': + /* + * Because we always dump the version even before parsing options, + * what we need to do is to quit.. + */ + exit(0); + break; + case 'z': + g.output_omit_zero_buckets = 1; + break; + default: + usage(); + break; + } + } +} + +void dump_globals(void) +{ + printf("Total runtime: \t\t%d seconds\n", g.runtime); + printf("Thread priority: \t"); + if (g.rtprio) { + printf("SCHED_FIFO:%d\n", g.rtprio); + } else { + printf("default\n"); + } + printf("CPU list: \t\t%s\n", g.cpu_list ?: "(all cores)"); + printf("CPU for main thread: \t%d\n", g.cpu_main_thread); + printf("Workload: \t\t%s\n", g.workload->w_name); + printf("Workload mem: \t\t%"PRIu64" (KiB)\n", + (g.workload->w_flags & WORK_NEED_MEM) ? + (g.workload_mem_size / 1024) : 0); + printf("Preheat cores: \t\t%d\n", g.single_preheat_thread ? + 1 : g.n_threads_total); + printf("\n"); +} + +static void record_bias(struct thread *t) +{ + int i; + uint64_t bias = (uint64_t)-1; + + if (!g.enable_bias) { + return; + } + + /* Record the min value of minlat on all the threads */ + for( i = 0; i < g.n_threads; ++i ) { + if (t[i].minlat < bias) { + bias = t[i].minlat; + } + } + g.bias = bias; + printf("Global bias set to %" PRId64 " (us)\n", bias); +} + +int main(int argc, char* argv[]) +{ + struct thread* threads; + int i, n_cores; + cpu_set_t cpu_set; + + CPU_ZERO(&cpu_set); + + g.app_name = argv[0]; + g.rtprio = 0; + g.bucket_size = BUCKET_SIZE; + g.runtime = 1; + g.workload = &workload_list[WORKLOAD_DEFUALT]; + g.workload_mem_size = WORKLOAD_MEM_SIZE; + /* Run the main thread on cpu0 by default */ + g.cpu_main_thread = 0; + + printf("\nVersion: %1.2f\n\n", VERSION); + + parse_options(argc, argv); + + TEST(mlockall(MCL_CURRENT | MCL_FUTURE) == 0); + + n_cores = parse_cpu_list(g.cpu_list, &cpu_set); + + TEST( threads = calloc(1, CPU_COUNT(&cpu_set) * sizeof(threads[0])) ); + for( i = 0; i < n_cores; ++i ) + if (CPU_ISSET(i, &cpu_set) && move_to_core(i) == 0) + threads[g.n_threads_total++].core_i = i; + + if (CPU_ISSET(0, &cpu_set) && g.rtprio) { + printf("WARNING: Running SCHED_FIFO workload on CPU 0 " + "may hang the main thread\n"); + } + + TEST(move_to_core(g.cpu_main_thread) == 0); + + signal(SIGALRM, handle_alarm); + signal(SIGINT, handle_alarm); + signal(SIGTERM, handle_alarm); + + dump_globals(); + + printf("Pre-heat for 1 seconds...\n"); + if (g.single_preheat_thread) { + g.n_threads = 1; + } else { + g.n_threads = g.n_threads_total; + } + run_expt(threads, 1); + record_bias(threads); + + printf("Test starts...\n"); + /* Reset n_threads to always run on all the cores */ + g.n_threads = g.n_threads_total; + run_expt(threads, g.runtime); + + printf("Test completed.\n\n"); + + write_summary(threads); + + if (g.cpu_list) { + free(g.cpu_list); + g.cpu_list = NULL; + } + + return 0; +} -- 2.26.2