On Wed, Mar 23, 2022 at 7:57 PM Steven Rostedt <rostedt@xxxxxxxxxxx> wrote: > > From: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx> > > Add a new trace-cmd command "analyze" that will look at a trace.dat file > and report various statistics about the code. > > Start off by showing the amount each CPU is idle. > > Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx> > --- > tracecmd/Makefile | 1 + > tracecmd/include/trace-local.h | 2 + > tracecmd/trace-analyze.c | 508 +++++++++++++++++++++++++++++++++ > tracecmd/trace-cmd.c | 1 + > 4 files changed, 512 insertions(+) > create mode 100644 tracecmd/trace-analyze.c > > diff --git a/tracecmd/Makefile b/tracecmd/Makefile > index 355f04723ad7..895294d09092 100644 > --- a/tracecmd/Makefile > +++ b/tracecmd/Makefile > @@ -37,6 +37,7 @@ TRACE_CMD_OBJS += trace-dump.o > TRACE_CMD_OBJS += trace-clear.o > TRACE_CMD_OBJS += trace-vm.o > TRACE_CMD_OBJS += trace-convert.o > +TRACE_CMD_OBJS += trace-analyze.o > > ifeq ($(VSOCK_DEFINED), 1) > TRACE_CMD_OBJS += trace-agent.o > diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h > index bb33de06bb58..f9d0745914fe 100644 > --- a/tracecmd/include/trace-local.h > +++ b/tracecmd/include/trace-local.h > @@ -112,6 +112,8 @@ void trace_dump(int argc, char **argv); > > void trace_convert(int argc, char **argv); > > +void trace_analyze(int argc, char **argv); > + > int trace_record_agent(struct tracecmd_msg_handle *msg_handle, > int cpus, int *fds, > int argc, char **argv, bool use_fifos, > diff --git a/tracecmd/trace-analyze.c b/tracecmd/trace-analyze.c > new file mode 100644 > index 000000000000..4a17cf12eb07 > --- /dev/null > +++ b/tracecmd/trace-analyze.c > @@ -0,0 +1,508 @@ > +// SPDX-License-Identifier: LGPL-2.1 > +/* > + * Copyright (C) 2022 Google Inc, Steven Rostedt <srostedt@xxxxxxxxxx> > + */ > +#define _LARGEFILE64_SOURCE > +#include <getopt.h> > +#include <stdlib.h> > + > +#include "trace-cmd-local.h" > +#include "trace-local.h" > +#include "trace-hash.h" > +#include "list.h" > + > +static bool time_in_nsecs; > + > +struct analysis_data; > + > +struct cpu_data { > + unsigned long long start_ts; > + unsigned long long last_ts; > + struct analysis_data *data; > + struct trace_hash tasks; > + int nr_tasks; > + int cpu; > + int current_pid; > + int first_pid; > +}; > + > +struct analysis_data { > + unsigned long long start_ts; > + unsigned long long last_ts; > + struct cpu_data *cpu_data; > + struct trace_hash tasks; > + int nr_tasks; > + int allocated_cpus; > + int cpus; Hi Steve, I don't see analysis_data->cpus field ever used. > +}; > + > +struct task_item { > + unsigned long long runtime; > + unsigned long long start_ts; > + struct trace_hash_item hash; > + int pid; > +}; > + > +struct task_cpu_item { > + struct trace_hash_item hash; > + struct task_item *task; > +}; > + > +#define CPU_BLOCKS 32 > + > +#define task_from_hash(item) container_of(item, struct task_item, hash) > +#define task_cpu_from_hash(item) container_of(item, struct task_cpu_item, hash) > + > +static struct cpu_data *get_cpu_data(struct analysis_data *data, > + struct tep_record *record) > +{ > + struct cpu_data *cpu_data; > + int cpu = record->cpu; > + int cnt; > + > + if (cpu < data->allocated_cpus) { > + cpu_data = &data->cpu_data[cpu]; > + if (!cpu_data->data) > + goto init_cpu_data; > + return cpu_data; > + } > + > + /* Round up to CPU_BLOCKS multiplier */ > + cnt = ((cpu + CPU_BLOCKS) / CPU_BLOCKS) * CPU_BLOCKS; > + > + cpu_data = realloc(data->cpu_data, sizeof(*cpu_data) * cnt); > + > + if (!cpu_data) > + die("Allocating cpu size %d for cpu %d", cnt, cpu); > + > + memset(cpu_data + data->allocated_cpus, 0, > + sizeof(*cpu_data) * (cnt - data->allocated_cpus)); > + > + data->allocated_cpus = cnt; > + > + data->cpu_data = cpu_data; > + cpu_data += cpu; > + > + init_cpu_data: > + cpu_data->current_pid = -1; > + cpu_data->cpu = cpu; > + cpu_data->data = data; > + > + trace_hash_init(&cpu_data->tasks, 32); nit: #define CPU_TASKS_HASH_SIZE 32? > + > + return cpu_data; > +} > + > +static int check_idle(struct cpu_data *cpu_data, int pid) > +{ > + if (pid) > + return pid; > + > + /* > + * Since pid 0 is the per cpu swapper task that > + * means several of these tasks have the same pid > + * and only differentiate between CPUs. Set the pid > + * that is stored in the hash as -2 - CPU id. > + */ > + > + return -2 - cpu_data->cpu; > +} > + > +static struct task_item *get_task(struct cpu_data *cpu_data, int pid) > +{ > + struct trace_hash_item *hash; > + struct task_item *task; > + int find_pid; > + > + find_pid = check_idle(cpu_data, pid); > + > + hash = trace_hash_find(&cpu_data->data->tasks, find_pid, NULL, NULL); > + if (!hash) { > + task = calloc(sizeof(*task), 1); > + if (!task) > + die("allocating task"); > + task->pid = find_pid; > + hash = &task->hash; > + hash->key = find_pid; > + cpu_data->data->nr_tasks++; > + trace_hash_add(&cpu_data->data->tasks, hash); > + } > + > + return task_from_hash(hash); > +} > + > +static struct task_cpu_item *get_cpu_task(struct cpu_data *cpu_data, int pid) > +{ > + struct trace_hash_item *hash; > + struct task_cpu_item *task; > + > + hash = trace_hash_find(&cpu_data->tasks, pid, NULL, NULL); > + if (!hash) { > + task = calloc(sizeof(*task), 1); > + if (!task) > + die("allocating cpu task"); > + task->task = get_task(cpu_data, pid); > + hash = &task->hash; > + hash->key = pid; > + cpu_data->nr_tasks++; > + trace_hash_add(&cpu_data->tasks, hash); > + } > + > + return task_cpu_from_hash(hash); > +}; > + > +/* Update times for a task scheduling off the CPU */ > +static void update_cpu_task_times(struct cpu_data *cpu_data, > + struct task_cpu_item *cpu_task, > + unsigned long long ts) > +{ > + unsigned long long delta; > + struct task_item *task = cpu_task->task; > + > + /* > + * If the last event was a sched switch where the previous task > + * ran on another CPU, and migrated back to this CPU, and sched > + * switch was not recorded (saying that this task scheduled off) > + * It could be miscronstrued to still be on this CPU, and that > + * its start_ts is later than the last_ts of this CPU. > + */ > + if (ts < task->start_ts) > + return; > + > + delta = ts - task->start_ts; > + task->runtime += delta; > +} > + > +static void update_pid(struct cpu_data *cpu_data, > + struct tep_record *record, int pid) > +{ > + struct task_cpu_item *cpu_task; > + struct task_item *task; > + int curr_pid = cpu_data->current_pid; > + unsigned long long delta; > + > + cpu_task = get_cpu_task(cpu_data, curr_pid); > + task = cpu_task->task; > + > + update_cpu_task_times(cpu_data, cpu_task, cpu_data->last_ts); > + > + if (!record) > + return; > + > + cpu_task = get_cpu_task(cpu_data, pid); > + task = cpu_task->task; > + > + if (record->ts < cpu_data->last_ts) { > + tracecmd_warning("task %d start time %llu greater than CPU time %llu", > + pid, record->ts, cpu_data->last_ts); Shouldn’t this say, ... start time %llu "less than" CPU time ...? > + } else { > + delta = record->ts - cpu_data->last_ts; > + task->runtime += delta; > + } > + > + cpu_data->last_ts = record->ts; > + > + task->start_ts = cpu_data->last_ts; > + cpu_data->current_pid = pid; > +} > + > +static void update_cpu_times(struct cpu_data *cpu_data, > + struct tep_handle *tep, int pid, > + struct tep_record *record) > +{ > + struct task_cpu_item *cpu_task; > + struct task_item *task; > + > + if (cpu_data->current_pid < 0) { > + /* First time called */ > + cpu_data->start_ts = record->ts; > + cpu_data->last_ts = record->ts; > + cpu_data->current_pid = pid; > + cpu_data->first_pid = pid; > + cpu_task = get_cpu_task(cpu_data, pid); > + task = cpu_task->task; > + task->start_ts = record->ts; > + if (record->ts < cpu_data->data->start_ts) > + cpu_data->data->start_ts = record->ts; > + return; > + } > + > + if (pid != cpu_data->current_pid) { > + update_pid(cpu_data, record, pid); > + return; > + } > + > + cpu_data->last_ts = record->ts; > +} > + > +static void update_first_pid(struct cpu_data *cpu_data) > +{ > + struct task_cpu_item *cpu_task; > + struct task_item *task; > + unsigned long long start_ts = cpu_data->data->start_ts; > + unsigned long long delta; > + > + cpu_task = get_cpu_task(cpu_data, cpu_data->first_pid); > + task = cpu_task->task; > + delta = cpu_data->start_ts - start_ts; > + task->runtime += delta; > + cpu_data->start_ts = start_ts; > +} > + > +static void process_cpu(struct analysis_data *data, > + struct tep_handle *tep, > + struct tep_record *record) > +{ > + struct cpu_data *cpu_data; > + int pid; > + > + pid = tep_data_pid(tep, record); > + if (pid < 0) /* Warn? */ > + return; > + > + cpu_data = get_cpu_data(data, record); > + update_cpu_times(cpu_data, tep, pid, record); > +} > + > +static void print_time(unsigned long long ts, char delim) > +{ > + unsigned long long secs; > + unsigned long long msecs; > + unsigned long long usecs; > + unsigned long long nsecs; > + > + secs = ts / 1000000000; > + ts -= secs * 1000000000; > + > + msecs = ts / 1000000; > + ts -= msecs * 1000000; > + > + usecs = ts / 1000; > + ts -= usecs * 1000; > + > + nsecs = ts; > + > + if (delim) { > + printf("%6llu.%03llu%c%03llu", > + secs, msecs, delim, usecs); > + if (time_in_nsecs) > + printf("%c%03llu", delim, nsecs); > + } else { > + printf("%6llu.%03llu%03llu", > + secs, msecs, usecs); > + if (time_in_nsecs) > + printf("%03llu", nsecs); > + } > +} > + > +static void print_total(struct tep_handle *tep, struct analysis_data *data) > +{ > + unsigned long long total_time; > + struct trace_hash_item **bucket; > + struct trace_hash_item *item; > + struct task_item **idle_tasks; > + struct task_item *task; > + bool first = true; > + int cpu; > + int i = 0; > + > + total_time = data->last_ts - data->start_ts; > + printf("\nTotal time: "); > + print_time(total_time, '_'); > + printf("\n"); > + > + idle_tasks = calloc(sizeof(*idle_tasks), data->allocated_cpus); > + if (!idle_tasks) > + die("Could not allocate idle task array"); > + > + trace_hash_for_each_bucket(bucket, &data->tasks) { > + trace_hash_for_each_item(item, bucket) { > + task = task_from_hash(item); > + if (task->pid < 0) { > + cpu = -2 - task->pid; > + idle_tasks[cpu] = task; > + } > + } > + } > + > + for (i = 0; i < data->allocated_cpus; i++) { > + if (!idle_tasks[i]) > + continue; > + > + if (first) { > + printf("\n Idle CPU\t Run time\n"); > + printf(" --------\t --------\n"); > + first = false; > + } > + printf("CPU %d idle:\t", i); > + print_time(idle_tasks[i]->runtime, '_'); > + printf(" (%%%lld)\n", (idle_tasks[i]->runtime * 100) / total_time); > + } > + free(idle_tasks); > +} > + > +static void free_tasks(struct trace_hash *hash) > +{ > + struct trace_hash_item **bucket; > + struct trace_hash_item *item; > + struct task_item *task; > + > + trace_hash_for_each_bucket(bucket, hash) { > + trace_hash_while_item(item, bucket) { > + task = task_from_hash(item); > + trace_hash_del(item); > + free(task); > + } > + } > + trace_hash_free(hash); > +} > + > +static void free_cpu_tasks(struct trace_hash *hash) > +{ > + struct trace_hash_item **bucket; > + struct trace_hash_item *item; > + struct task_cpu_item *cpu_task; > + > + trace_hash_for_each_bucket(bucket, hash) { > + trace_hash_while_item(item, bucket) { > + cpu_task = task_cpu_from_hash(item); > + trace_hash_del(item); > + free(cpu_task); > + } > + } > + trace_hash_free(hash); > +} > + > +static void free_cpus(struct analysis_data *data) > +{ > + struct cpu_data *cpu_data; > + int i; > + > + for (i = 0; i < data->allocated_cpus; i++) { > + cpu_data = &data->cpu_data[i]; > + if (!cpu_data->data) > + continue; > + free_cpu_tasks(&cpu_data->tasks); > + } > + free(data->cpu_data); > +} > + > +static void do_trace_analyze(struct tracecmd_input *handle) > +{ > + struct tep_handle *tep = tracecmd_get_tep(handle); > + struct tep_record *record; > + struct analysis_data data; > + struct cpu_data *cpu_data; > + int i; > + > + memset(&data, 0, sizeof(data)); > + > + trace_hash_init(&data.tasks, 128); > + > + /* Set to a very large number */ > + data.start_ts = -1ULL; > + > + do { > + record = tracecmd_read_next_data(handle, NULL); > + if (record) > + process_cpu(&data, tep, record); > + tracecmd_free_record(record); > + } while (record); > + > + for (i = 0; i < data.allocated_cpus; i++) { > + cpu_data = &data.cpu_data[i]; > + if (!cpu_data->data || !cpu_data->nr_tasks) > + continue; > + if (cpu_data->last_ts > data.last_ts) > + data.last_ts = cpu_data->last_ts; > + } > + > + for (i = 0; i < data.allocated_cpus; i++) { > + cpu_data = &data.cpu_data[i]; > + if (!cpu_data->data || !cpu_data->nr_tasks) > + continue; > + cpu_data->last_ts = data.last_ts; > + update_pid(cpu_data, NULL, -1); I think having update_last_pid() makes this easier to read. Thanks, Kalesh > + update_first_pid(cpu_data); > + } > + > + print_total(tep, &data); > + > + free_cpus(&data); > + free_tasks(&data.tasks); > +} > + > +void trace_analyze(int argc, char **argv) > +{ > + struct tracecmd_input *handle; > + const char *input_file = NULL; > + int instances; > + int ret; > + > + for (;;) { > + int c; > + > + c = getopt(argc-1, argv+1, "+hti:"); > + if (c == -1) > + break; > + switch (c) { > + case 'h': > + usage(argv); > + break; > + case 'i': > + if (input_file) > + die("Only one input for historgram"); > + input_file = optarg; > + break; > + case 't': > + time_in_nsecs = true; > + break; > + default: > + usage(argv); > + } > + } > + > + if ((argc - optind) >= 2) { > + if (input_file) > + usage(argv); > + input_file = argv[optind + 1]; > + } > + > + if (!input_file) > + input_file = DEFAULT_INPUT_FILE; > + > + handle = tracecmd_alloc(input_file, 0); > + if (!handle) > + die("can't open %s\n", input_file); > + > + ret = tracecmd_read_headers(handle, 0); > + if (ret) > + return; > + > + ret = tracecmd_init_data(handle); > + if (ret < 0) > + die("failed to init data"); > + > + if (ret > 0) > + die("trace-cmd hist does not work with latency traces\n"); > + > + do_trace_analyze(handle); > + > + instances = tracecmd_buffer_instances(handle); > + if (instances) { > + struct tracecmd_input *new_handle; > + int i; > + > + for (i = 0; i < instances; i++) { > + new_handle = tracecmd_buffer_instance_handle(handle, i); > + if (!new_handle) { > + warning("could not retrieve handle %d", i); > + continue; > + } > + do_trace_analyze(new_handle); > + tracecmd_close(new_handle); > + } > + } > + > + tracecmd_close(handle); > +} > diff --git a/tracecmd/trace-cmd.c b/tracecmd/trace-cmd.c > index a83a8d0bb9e4..d6e1f9dacc3c 100644 > --- a/tracecmd/trace-cmd.c > +++ b/tracecmd/trace-cmd.c > @@ -134,6 +134,7 @@ struct command commands[] = { > {"help", trace_usage}, > {"dump", trace_dump}, > {"convert", trace_convert}, > + {"analyze", trace_analyze}, > {"-h", trace_usage}, > }; > > -- > 2.35.1 >