>From 17c8c11792d35c7a8ec50e09b32842177eba625e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" <rostedt@xxxxxxxxxxx> Date: Wed, 5 May 2021 08:04:57 -0400 Subject: [PATCH] trace-cmd: Find PID of host-guest task from tracing vsock connection Searching for the qemu task to find what task the CID is for is not a reliable method, as the qemu executable may be called different names on different systems, and qemu may not even be used. Instead, trace the sched_waking and kvm_exit events and do a vsock connection to the CID. By doing so, you can find the task that runs the guest. trace-cmd wakes up "vhost-<pid>" vhost-<pid> wakes up "CPUX/KVM" CPUX/KVM calls "kvm_exit" The CPUX/KVM is the task that runs the vCPU of the guest, but we still need the task group leader of this task to find the task that KVM has for timestamp offsets and multipliers. To do that, look at the proc file system for the PID of the CPUX/KVM and read its status file. This holds "Tgid: <pid>" where "<pid>" is the PID of the task that has the KVM information. Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx> --- Changes since v1: - Open code the tracefs_event_enable() functions as that comes in libtracefs 1.2 and we still want this only supporting 1.1. - Check for interrupts before searching the pid list, as that's the faster check. tracecmd/trace-vm.c | 227 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 226 insertions(+), 1 deletion(-) diff --git a/tracecmd/trace-vm.c b/tracecmd/trace-vm.c index c0333b67..d3a37eee 100644 --- a/tracecmd/trace-vm.c +++ b/tracecmd/trace-vm.c @@ -9,6 +9,7 @@ #include <sys/types.h> #include <dirent.h> #include <limits.h> +#include <unistd.h> #include "trace-local.h" #include "trace-msg.h" @@ -87,6 +88,227 @@ static struct trace_guest *add_guest(unsigned int cid, const char *name) return &guests[guests_len - 1]; } +static struct tracefs_instance *start_trace_connect(void) +{ + struct tracefs_instance *open_instance; + + open_instance = tracefs_instance_create("vsock_find_pid"); + if (!open_instance) + return NULL; + + tracefs_instance_file_write(open_instance, "events/sched/sched_waking/enable", "1"); + tracefs_instance_file_write(open_instance, "events/kvm/kvm_exit/enable", "1"); + tracefs_trace_on(open_instance); + return open_instance; +} + +struct pids { + struct pids *next; + int pid; +}; + +struct trace_fields { + struct tep_event *sched_waking; + struct tep_event *kvm_exit; + struct tep_format_field *common_pid; + struct tep_format_field *sched_next; + struct pids *pids; + int found_pid; +}; + +static void free_pids(struct pids *pids) +{ + struct pids *next; + + while (pids) { + next = pids; + pids = pids->next; + free(next); + } +} + +static void add_pid(struct pids **pids, int pid) +{ + struct pids *new_pid; + + new_pid = malloc(sizeof(*new_pid)); + if (!new_pid) + return; + + new_pid->pid = pid; + new_pid->next = *pids; + *pids = new_pid; +} + +static bool match_pid(struct pids *pids, int pid) +{ + while (pids) { + if (pids->pid == pid) + return true; + pids = pids->next; + } + return false; +} + +static int callback(struct tep_event *event, struct tep_record *record, int cpu, + void *data) +{ + struct trace_fields *fields = data; + struct tep_handle *tep = event->tep; + unsigned long long val; + int flags; + int type; + int pid; + int ret; + + ret = tep_read_number_field(fields->common_pid, record->data, &val); + if (ret < 0) + return 0; + + flags = tep_data_flags(tep, record); + + /* Ignore events in interrupts */ + if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ)) + return 0; + + /* + * First make sure that this event comes from a PID from + * this task (or a task woken by this task) + */ + pid = val; + if (!match_pid(fields->pids, pid)) + return 0; + + type = tep_data_type(tep, record); + + /* + * If this event is a kvm_exit, we have our PID + * and we can stop processing. + */ + if (type == fields->kvm_exit->id) { + fields->found_pid = pid; + return -1; + } + + if (type != fields->sched_waking->id) + return 0; + + ret = tep_read_number_field(fields->sched_next, record->data, &val); + if (ret < 0) + return 0; + + /* This is a task woken by our task or a chain of wake ups */ + add_pid(&fields->pids, (int)val); + return 0; +} + +static int find_tgid(int pid) +{ + FILE *fp; + char *path; + char *buf = NULL; + char *save; + size_t l = 0; + int tgid = -1; + + if (asprintf(&path, "/proc/%d/status", pid) < 0) + return -1; + + fp = fopen(path, "r"); + free(path); + if (!fp) + return -1; + + while (getline(&buf, &l, fp) > 0) { + char *tok; + + if (strncmp(buf, "Tgid:", 5) != 0) + continue; + tok = strtok_r(buf, ":", &save); + if (!tok) + continue; + tok = strtok_r(NULL, ":", &save); + if (!tok) + continue; + while (isspace(*tok)) + tok++; + tgid = strtol(tok, NULL, 0); + break; + } + free(buf); + fclose(fp); + + return tgid; +} + +static int stop_trace_connect(struct tracefs_instance *open_instance) +{ + const char *systems[] = { "kvm", "sched", NULL}; + struct tep_handle *tep; + struct trace_fields trace_fields; + int tgid = -1; + + if (!open_instance) + return -1; + + /* The connection is finished, stop tracing, we have what we want */ + tracefs_trace_off(open_instance); + tracefs_instance_file_write(open_instance, "events//enable", "0"); + + tep = tracefs_local_events_system(NULL, systems); + + trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking"); + if (!trace_fields.sched_waking) + goto out; + trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit"); + if (!trace_fields.kvm_exit) + goto out; + trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking, + "common_pid"); + if (!trace_fields.common_pid) + goto out; + trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking, + "pid"); + if (!trace_fields.sched_next) + goto out; + + trace_fields.found_pid = -1; + trace_fields.pids = NULL; + add_pid(&trace_fields.pids, getpid()); + tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields); + free_pids(trace_fields.pids); + out: + tracefs_instance_destroy(open_instance); + tracefs_instance_free(open_instance); + + if (trace_fields.found_pid > 0) + tgid = find_tgid(trace_fields.found_pid); + + return tgid; +} + +/* + * In order to find the guest that is associated to the given cid, + * trace the sched_waking and kvm_exit events, connect to the cid + * (doesn't matter what port, use -1 to not connect to anything) + * and find what task gets woken up from this code and calls kvm_exit, + * then that is the task that is running the guest. + * Then look at the /proc/<guest-pid>/status file to find the task group + * id (Tgid), and this is the PID of the task running all the threads. + */ +static void find_pid_by_cid(struct trace_guest *guest) +{ + struct tracefs_instance *instance; + int fd; + + instance = start_trace_connect(); + fd = trace_open_vsock(guest->cid, -1); + guest->pid = stop_trace_connect(instance); + /* Just in case! */ + if (fd >= 0) + close(fd); +} + struct trace_guest *trace_get_guest(unsigned int cid, const char *name) { struct trace_guest *guest = NULL; @@ -99,8 +321,11 @@ struct trace_guest *trace_get_guest(unsigned int cid, const char *name) if (cid > 0) { guest = get_guest_by_cid(cid); - if (!guest && name) + if (!guest && name) { guest = add_guest(cid, name); + if (guest) + find_pid_by_cid(guest); + } } return guest; } -- 2.29.2