Re: [PATCH 01/12] trace-cmd: Add trace-cmd analyze

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 28 Mar 2022 09:08:02 -0700
Kalesh Singh <kaleshsingh@xxxxxxxxxx> wrote:

> > +++ b/tracecmd/trace-analyze.c
> > @@ -0,0 +1,508 @@
> > +// SPDX-License-Identifier: LGPL-2.1
> > +/*
> > + * Copyright (C) 2022 Google Inc, Steven Rostedt <srostedt@xxxxxxxxxx>
> > + */
> > +#define _LARGEFILE64_SOURCE
> > +#include <getopt.h>
> > +#include <stdlib.h>
> > +
> > +#include "trace-cmd-local.h"
> > +#include "trace-local.h"
> > +#include "trace-hash.h"
> > +#include "list.h"
> > +
> > +static bool time_in_nsecs;
> > +
> > +struct analysis_data;
> > +
> > +struct cpu_data {
> > +       unsigned long long      start_ts;
> > +       unsigned long long      last_ts;
> > +       struct analysis_data    *data;
> > +       struct trace_hash       tasks;
> > +       int                     nr_tasks;
> > +       int                     cpu;
> > +       int                     current_pid;
> > +       int                     first_pid;
> > +};
> > +
> > +struct analysis_data {
> > +       unsigned long long      start_ts;
> > +       unsigned long long      last_ts;
> > +       struct cpu_data         *cpu_data;
> > +       struct trace_hash       tasks;
> > +       int                     nr_tasks;
> > +       int                     allocated_cpus;
> > +       int                     cpus;  
> 
> Hi Steve,
> 
> I don't see analysis_data->cpus field ever used.

Probably not. I was debating about adding RFC to this series, but decided
not to. Now I regret that, because it really is an RFC (as well as I rushed
a lot of it).

> 
> > +};
> > +
> > +struct task_item {
> > +       unsigned long long      runtime;
> > +       unsigned long long      start_ts;
> > +       struct trace_hash_item  hash;
> > +       int                     pid;
> > +};
> > +
> > +struct task_cpu_item {
> > +       struct trace_hash_item  hash;
> > +       struct task_item        *task;
> > +};
> > +
> > +#define CPU_BLOCKS     32
> > +
> > +#define task_from_hash(item) container_of(item, struct task_item, hash)
> > +#define task_cpu_from_hash(item)  container_of(item, struct task_cpu_item, hash)
> > +
> > +static struct cpu_data *get_cpu_data(struct analysis_data *data,
> > +                                    struct tep_record *record)
> > +{
> > +       struct cpu_data *cpu_data;
> > +       int cpu = record->cpu;
> > +       int cnt;
> > +
> > +       if (cpu < data->allocated_cpus) {
> > +               cpu_data = &data->cpu_data[cpu];
> > +               if (!cpu_data->data)
> > +                       goto init_cpu_data;
> > +               return cpu_data;
> > +       }
> > +
> > +       /* Round up to CPU_BLOCKS multiplier */
> > +       cnt = ((cpu + CPU_BLOCKS) / CPU_BLOCKS) * CPU_BLOCKS;
> > +
> > +       cpu_data = realloc(data->cpu_data, sizeof(*cpu_data) * cnt);
> > +
> > +       if (!cpu_data)
> > +               die("Allocating cpu size %d for cpu %d", cnt, cpu);
> > +
> > +       memset(cpu_data + data->allocated_cpus, 0,
> > +              sizeof(*cpu_data) * (cnt - data->allocated_cpus));
> > +
> > +       data->allocated_cpus = cnt;
> > +
> > +       data->cpu_data = cpu_data;
> > +       cpu_data += cpu;
> > +
> > + init_cpu_data:
> > +       cpu_data->current_pid = -1;
> > +       cpu_data->cpu = cpu;
> > +       cpu_data->data = data;
> > +
> > +       trace_hash_init(&cpu_data->tasks, 32);  
> 
> nit: #define CPU_TASKS_HASH_SIZE 32?

Agreed.

> 
> > +
> > +       return cpu_data;
> > +}
> > +
> > +static int check_idle(struct cpu_data *cpu_data, int pid)
> > +{
> > +       if (pid)
> > +               return pid;
> > +
> > +       /*
> > +        * Since pid 0 is the per cpu swapper task that
> > +        * means several of these tasks have the same pid
> > +        * and only differentiate between CPUs. Set the pid
> > +        * that is stored in the hash as -2 - CPU id.
> > +        */
> > +
> > +       return -2 - cpu_data->cpu;
> > +}
> > +
> > +static struct task_item *get_task(struct cpu_data *cpu_data, int pid)
> > +{
> > +       struct trace_hash_item *hash;
> > +       struct task_item *task;
> > +       int find_pid;
> > +
> > +       find_pid = check_idle(cpu_data, pid);
> > +
> > +       hash = trace_hash_find(&cpu_data->data->tasks, find_pid, NULL, NULL);
> > +       if (!hash) {
> > +               task = calloc(sizeof(*task), 1);
> > +               if (!task)
> > +                       die("allocating task");
> > +               task->pid = find_pid;
> > +               hash = &task->hash;
> > +               hash->key = find_pid;
> > +               cpu_data->data->nr_tasks++;
> > +               trace_hash_add(&cpu_data->data->tasks, hash);
> > +       }
> > +
> > +       return task_from_hash(hash);
> > +}
> > +
> > +static struct task_cpu_item *get_cpu_task(struct cpu_data *cpu_data, int pid)
> > +{
> > +       struct trace_hash_item *hash;
> > +       struct task_cpu_item *task;
> > +
> > +       hash = trace_hash_find(&cpu_data->tasks, pid, NULL, NULL);
> > +       if (!hash) {
> > +               task = calloc(sizeof(*task), 1);
> > +               if (!task)
> > +                       die("allocating cpu task");
> > +               task->task = get_task(cpu_data, pid);
> > +               hash = &task->hash;
> > +               hash->key = pid;
> > +               cpu_data->nr_tasks++;
> > +               trace_hash_add(&cpu_data->tasks, hash);
> > +       }
> > +
> > +       return task_cpu_from_hash(hash);
> > +};
> > +
> > +/* Update times for a task scheduling off the CPU */
> > +static void update_cpu_task_times(struct cpu_data *cpu_data,
> > +                                 struct task_cpu_item *cpu_task,
> > +                                 unsigned long long ts)
> > +{
> > +       unsigned long long delta;
> > +       struct task_item *task = cpu_task->task;
> > +
> > +       /*
> > +        * If the last event was a sched switch where the previous task
> > +        * ran on another CPU, and migrated back to this CPU, and sched
> > +        * switch was not recorded (saying that this task scheduled off)
> > +        * It could be miscronstrued to still be on this CPU, and that
> > +        * its start_ts is later than the last_ts of this CPU.
> > +        */
> > +       if (ts < task->start_ts)
> > +               return;
> > +
> > +       delta = ts - task->start_ts;
> > +       task->runtime += delta;
> > +}
> > +
> > +static void update_pid(struct cpu_data *cpu_data,
> > +                      struct tep_record *record, int pid)
> > +{
> > +       struct task_cpu_item *cpu_task;
> > +       struct task_item *task;
> > +       int curr_pid = cpu_data->current_pid;
> > +       unsigned long long delta;
> > +
> > +       cpu_task = get_cpu_task(cpu_data, curr_pid);
> > +       task = cpu_task->task;
> > +
> > +       update_cpu_task_times(cpu_data, cpu_task, cpu_data->last_ts);
> > +
> > +       if (!record)
> > +               return;
> > +
> > +       cpu_task = get_cpu_task(cpu_data, pid);
> > +       task = cpu_task->task;
> > +
> > +       if (record->ts < cpu_data->last_ts) {
> > +               tracecmd_warning("task %d start time %llu greater than CPU time %llu",
> > +                                pid, record->ts, cpu_data->last_ts);  
> 
> Shouldn’t this say, ... start time %llu "less than" CPU time ...?

Yeah, that wording is confusing.

	"task %d CPU start time %llu is before last event %llu on CPU for a different task"

?

> 
> > +       } else {
> > +               delta = record->ts - cpu_data->last_ts;
> > +               task->runtime += delta;
> > +       }
> > +
> > +       cpu_data->last_ts = record->ts;
> > +
> > +       task->start_ts = cpu_data->last_ts;
> > +       cpu_data->current_pid = pid;
> > +}
> > +
> > +static void update_cpu_times(struct cpu_data *cpu_data,
> > +                            struct tep_handle *tep, int pid,
> > +                            struct tep_record *record)
> > +{
> > +       struct task_cpu_item *cpu_task;
> > +       struct task_item *task;
> > +
> > +       if (cpu_data->current_pid < 0) {
> > +               /* First time called */
> > +               cpu_data->start_ts = record->ts;
> > +               cpu_data->last_ts = record->ts;
> > +               cpu_data->current_pid = pid;
> > +               cpu_data->first_pid = pid;
> > +               cpu_task = get_cpu_task(cpu_data, pid);
> > +               task = cpu_task->task;
> > +               task->start_ts = record->ts;
> > +               if (record->ts < cpu_data->data->start_ts)
> > +                       cpu_data->data->start_ts = record->ts;
> > +               return;
> > +       }
> > +
> > +       if (pid != cpu_data->current_pid) {
> > +               update_pid(cpu_data, record, pid);
> > +               return;
> > +       }
> > +
> > +       cpu_data->last_ts = record->ts;
> > +}
> > +
> > +static void update_first_pid(struct cpu_data *cpu_data)
> > +{
> > +       struct task_cpu_item *cpu_task;
> > +       struct task_item *task;
> > +       unsigned long long start_ts = cpu_data->data->start_ts;
> > +       unsigned long long delta;
> > +
> > +       cpu_task = get_cpu_task(cpu_data, cpu_data->first_pid);
> > +       task = cpu_task->task;
> > +       delta = cpu_data->start_ts - start_ts;
> > +       task->runtime += delta;
> > +       cpu_data->start_ts = start_ts;
> > +}
> > +
> > +static void process_cpu(struct analysis_data *data,
> > +                       struct tep_handle *tep,
> > +                       struct tep_record *record)
> > +{
> > +       struct cpu_data *cpu_data;
> > +       int pid;
> > +
> > +       pid = tep_data_pid(tep, record);
> > +       if (pid < 0) /* Warn? */
> > +               return;
> > +
> > +       cpu_data = get_cpu_data(data, record);
> > +       update_cpu_times(cpu_data, tep, pid, record);
> > +}
> > +
> > +static void print_time(unsigned long long ts, char delim)
> > +{
> > +       unsigned long long secs;
> > +       unsigned long long msecs;
> > +       unsigned long long usecs;
> > +       unsigned long long nsecs;
> > +
> > +       secs = ts / 1000000000;
> > +       ts -= secs * 1000000000;
> > +
> > +       msecs = ts / 1000000;
> > +       ts -= msecs * 1000000;
> > +
> > +       usecs = ts / 1000;
> > +       ts -= usecs * 1000;
> > +
> > +       nsecs = ts;
> > +
> > +       if (delim) {
> > +               printf("%6llu.%03llu%c%03llu",
> > +                      secs, msecs, delim, usecs);
> > +               if (time_in_nsecs)
> > +                       printf("%c%03llu", delim, nsecs);
> > +       } else {
> > +               printf("%6llu.%03llu%03llu",
> > +                      secs, msecs, usecs);
> > +               if (time_in_nsecs)
> > +                       printf("%03llu", nsecs);
> > +       }
> > +}
> > +
> > +static void print_total(struct tep_handle *tep, struct analysis_data *data)
> > +{
> > +       unsigned long long total_time;
> > +       struct trace_hash_item **bucket;
> > +       struct trace_hash_item *item;
> > +       struct task_item **idle_tasks;
> > +       struct task_item *task;
> > +       bool first = true;
> > +       int cpu;
> > +       int i = 0;
> > +
> > +       total_time = data->last_ts - data->start_ts;
> > +       printf("\nTotal time: ");
> > +       print_time(total_time, '_');
> > +       printf("\n");
> > +
> > +       idle_tasks = calloc(sizeof(*idle_tasks), data->allocated_cpus);
> > +       if (!idle_tasks)
> > +               die("Could not allocate idle task array");
> > +
> > +       trace_hash_for_each_bucket(bucket, &data->tasks) {
> > +               trace_hash_for_each_item(item, bucket) {
> > +                       task = task_from_hash(item);
> > +                       if (task->pid < 0) {
> > +                               cpu = -2 - task->pid;
> > +                               idle_tasks[cpu] = task;
> > +                       }
> > +               }
> > +       }
> > +
> > +       for (i = 0; i < data->allocated_cpus; i++) {
> > +               if (!idle_tasks[i])
> > +                       continue;
> > +
> > +               if (first) {
> > +                       printf("\n Idle CPU\t     Run time\n");
> > +                       printf(" --------\t     --------\n");
> > +                       first = false;
> > +               }
> > +               printf("CPU %d idle:\t", i);
> > +               print_time(idle_tasks[i]->runtime, '_');
> > +               printf(" (%%%lld)\n", (idle_tasks[i]->runtime * 100) / total_time);
> > +       }
> > +       free(idle_tasks);
> > +}
> > +
> > +static void free_tasks(struct trace_hash *hash)
> > +{
> > +       struct trace_hash_item **bucket;
> > +       struct trace_hash_item *item;
> > +       struct task_item *task;
> > +
> > +       trace_hash_for_each_bucket(bucket, hash) {
> > +               trace_hash_while_item(item, bucket) {
> > +                       task = task_from_hash(item);
> > +                       trace_hash_del(item);
> > +                       free(task);
> > +               }
> > +       }
> > +       trace_hash_free(hash);
> > +}
> > +
> > +static void free_cpu_tasks(struct trace_hash *hash)
> > +{
> > +       struct trace_hash_item **bucket;
> > +       struct trace_hash_item *item;
> > +       struct task_cpu_item *cpu_task;
> > +
> > +       trace_hash_for_each_bucket(bucket, hash) {
> > +               trace_hash_while_item(item, bucket) {
> > +                       cpu_task = task_cpu_from_hash(item);
> > +                       trace_hash_del(item);
> > +                       free(cpu_task);
> > +               }
> > +       }
> > +       trace_hash_free(hash);
> > +}
> > +
> > +static void free_cpus(struct analysis_data *data)
> > +{
> > +       struct cpu_data *cpu_data;
> > +       int i;
> > +
> > +       for (i = 0; i < data->allocated_cpus; i++) {
> > +               cpu_data = &data->cpu_data[i];
> > +               if (!cpu_data->data)
> > +                       continue;
> > +               free_cpu_tasks(&cpu_data->tasks);
> > +       }
> > +       free(data->cpu_data);
> > +}
> > +
> > +static void do_trace_analyze(struct tracecmd_input *handle)
> > +{
> > +       struct tep_handle *tep = tracecmd_get_tep(handle);
> > +       struct tep_record *record;
> > +       struct analysis_data data;
> > +       struct cpu_data *cpu_data;
> > +       int i;
> > +
> > +       memset(&data, 0, sizeof(data));
> > +
> > +       trace_hash_init(&data.tasks, 128);
> > +
> > +       /* Set to a very large number */
> > +       data.start_ts = -1ULL;
> > +
> > +       do {
> > +               record = tracecmd_read_next_data(handle, NULL);
> > +               if (record)
> > +                       process_cpu(&data, tep, record);
> > +               tracecmd_free_record(record);
> > +       } while (record);
> > +
> > +       for (i = 0; i < data.allocated_cpus; i++) {
> > +               cpu_data = &data.cpu_data[i];
> > +               if (!cpu_data->data || !cpu_data->nr_tasks)
> > +                       continue;
> > +               if (cpu_data->last_ts > data.last_ts)
> > +                       data.last_ts = cpu_data->last_ts;
> > +       }
> > +
> > +       for (i = 0; i < data.allocated_cpus; i++) {
> > +               cpu_data = &data.cpu_data[i];
> > +               if (!cpu_data->data || !cpu_data->nr_tasks)
> > +                       continue;
> > +               cpu_data->last_ts = data.last_ts;
> > +               update_pid(cpu_data, NULL, -1);  
> 
> I think having update_last_pid() makes this easier to read.

You mean to add a helper function that does the last pid and have
update_pid() start with that?

I could do that.

Thanks for looking at this.

Note, I'm going to rewrite this so that this is actually part of the
library, to allow other applications to have access to this functionality.

-- Steve



[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux