From: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx> There are some setups where the vcpu directories in: /sys/kernel/debug/kvm/<pid>/vcpuX Does not match the CPU of the guest. That is, the guest CPU 3 may be vcpu8. This breaks the algorithm used when setting up the kvm time synchronization. As it expects these files to map to the CPU used, and if one of the vcpuX is greater than the number of CPUs of the guest, it will fail. Currently, the locations that do this at least have the guest CPUS consecutive (non sparse) and they map to the kvm vcpuX in order. That is, if the vcpuX is sorted by the X it will then map to the CPUs of the guest. This may not hold true for all setups, but we are currently just going to solve this one. It appears that the vcpuX in kvm maps to the guest's apicid in /proc/cpuinfo. More can be done later to make sure the two are in order and if not to put in meta data that will allow the kvm mappings to still work. For now restructure the code by creating a single kvm_clock_files structure to hold the offset, scaling and frac arrays in a single structure, and also add the vcpu to it when adding new information from the vcpuX directories. Then sort the list by the vcpu such that it should map to the 0-nr_cpus of the guest. Link: https://lore.kernel.org/all/20220504010242.1388192-1-vineethrp@xxxxxxxxxx/ Reported-by: Vineeth Pillai <vineethrp@xxxxxxxxxx> Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx> --- lib/trace-cmd/trace-timesync-kvm.c | 104 ++++++++++++++++------------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/lib/trace-cmd/trace-timesync-kvm.c b/lib/trace-cmd/trace-timesync-kvm.c index 1db63d94f545..aa1799b1eefb 100644 --- a/lib/trace-cmd/trace-timesync-kvm.c +++ b/lib/trace-cmd/trace-timesync-kvm.c @@ -33,15 +33,20 @@ typedef __s64 s64; #define KVM_ACCURACY 0 #define KVM_NAME "kvm" +struct kvm_clock_files { + int vcpu; + char *offsets; + char *scalings; + char *frac; +}; + struct kvm_clock_sync { - int vcpu_count; - char **vcpu_offsets; - char **vcpu_scalings; - char **vcpu_frac; - int marker_fd; - struct tep_handle *tep; - int raw_id; - unsigned long long ts; + int vcpu_count; + int marker_fd; + struct kvm_clock_files *clock_files; + struct tep_handle *tep; + int raw_id; + unsigned long long ts; }; struct kvm_clock_offset_msg { @@ -210,7 +215,7 @@ static bool kvm_support_check(bool guest) return kvm_scaling_check(); } -static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str) +static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int i, char *dir_str) { struct dirent *entry; char path[PATH_MAX]; @@ -224,21 +229,21 @@ static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str) if (!strcmp(entry->d_name, KVM_DEBUG_OFFSET_FILE)) { snprintf(path, sizeof(path), "%s/%s", dir_str, entry->d_name); - kvm->vcpu_offsets[cpu] = strdup(path); + kvm->clock_files[i].offsets = strdup(path); } if (!strcmp(entry->d_name, KVM_DEBUG_SCALING_FILE)) { snprintf(path, sizeof(path), "%s/%s", dir_str, entry->d_name); - kvm->vcpu_scalings[cpu] = strdup(path); + kvm->clock_files[i].scalings = strdup(path); } if (!strcmp(entry->d_name, KVM_DEBUG_FRACTION_FILE)) { snprintf(path, sizeof(path), "%s/%s", dir_str, entry->d_name); - kvm->vcpu_frac[cpu] = strdup(path); + kvm->clock_files[i].frac = strdup(path); } } } - if (!kvm->vcpu_offsets[cpu]) + if (!kvm->clock_files[i].offsets) goto error; closedir(dir); return 0; @@ -246,15 +251,25 @@ static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str) error: if (dir) closedir(dir); - free(kvm->vcpu_offsets[cpu]); - kvm->vcpu_offsets[cpu] = NULL; - free(kvm->vcpu_scalings[cpu]); - kvm->vcpu_scalings[cpu] = NULL; - free(kvm->vcpu_frac[cpu]); - kvm->vcpu_frac[cpu] = NULL; + free(kvm->clock_files[i].offsets); + kvm->clock_files[i].offsets = NULL; + free(kvm->clock_files[i].scalings); + kvm->clock_files[i].scalings = NULL; + free(kvm->clock_files[i].frac); + kvm->clock_files[i].frac = NULL; return -1; } +static int cmp_clock(const void *A, const void *B) +{ + const struct kvm_clock_files *a = A; + const struct kvm_clock_files *b = B; + + if (a->vcpu < b->vcpu) + return -1; + return a->vcpu > b->vcpu; +} + static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid) { char *vm_dir_str = NULL; @@ -284,21 +299,25 @@ static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid) dir = opendir(vm_dir_str); if (!dir) goto error; + i = 0; while ((entry = readdir(dir))) { if (!(entry->d_type == DT_DIR && !strncmp(entry->d_name, KVM_DEBUG_VCPU_DIR, strlen(KVM_DEBUG_VCPU_DIR)))) continue; - vcpu = strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10); - if (vcpu < 0 || vcpu >= kvm->vcpu_count) - continue; - snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name); - if (kvm_open_vcpu_dir(kvm, vcpu, path) < 0) + if (i == kvm->vcpu_count) goto error; - } - for (i = 0; i < kvm->vcpu_count; i++) { - if (!kvm->vcpu_offsets[i]) + vcpu = strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10); + kvm->clock_files[i].vcpu = vcpu; + snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name); + if (kvm_open_vcpu_dir(kvm, i, path) < 0) goto error; + i++; } + if (i < kvm->vcpu_count) + goto error; + + qsort(kvm->clock_files, kvm->vcpu_count, sizeof(*kvm->clock_files), cmp_clock); + closedir(dir); free(pid_str); free(vm_dir_str); @@ -315,19 +334,15 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync, struct kvm_clock_sync *kvm) { kvm->vcpu_count = tsync->vcpu_count; - kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *)); - kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *)); - kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *)); - if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac) + kvm->clock_files = calloc(kvm->vcpu_count, sizeof(*kvm->clock_files)); + if (!kvm->clock_files) goto error; if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0) goto error; return 0; error: - free(kvm->vcpu_offsets); - free(kvm->vcpu_scalings); - free(kvm->vcpu_frac); + free(kvm->clock_files); return -1; } @@ -414,12 +429,9 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync) kvm = (struct kvm_clock_sync *)clock_context->proto_data; if (kvm) { for (i = 0; i < kvm->vcpu_count; i++) { - free(kvm->vcpu_offsets[i]); - kvm->vcpu_offsets[i] = NULL; - free(kvm->vcpu_scalings[i]); - kvm->vcpu_scalings[i] = NULL; - free(kvm->vcpu_frac[i]); - kvm->vcpu_frac[i] = NULL; + free(kvm->clock_files[i].offsets); + free(kvm->clock_files[i].scalings); + free(kvm->clock_files[i].frac); } if (kvm->tep) tep_free(kvm->tep); @@ -449,23 +461,23 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync, clock_context = (struct clock_sync_context *)tsync->context; if (clock_context) kvm = (struct kvm_clock_sync *)clock_context->proto_data; - if (!kvm || !kvm->vcpu_offsets || !kvm->vcpu_offsets[0]) + if (!kvm || !kvm->clock_files || !kvm->clock_files[0].offsets) return -1; if (cpu >= kvm->vcpu_count) return -1; - ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset); + ret = read_ll_from_file(kvm->clock_files[cpu].offsets, &kvm_offset); if (ret < 0) return -1; - if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) { - read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling); + if (kvm->clock_files[cpu].scalings) { + read_ll_from_file(kvm->clock_files[cpu].scalings, &kvm_scaling); if (kvm_scaling == KVM_SCALING_AMD_DEFAULT || kvm_scaling == KVM_SCALING_INTEL_DEFAULT) kvm_scaling = 1; } - if (kvm->vcpu_frac && kvm->vcpu_frac[cpu] && kvm_scaling != 1) - ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac); + if (kvm->clock_files[cpu].frac && kvm_scaling != 1) + ret = read_ll_from_file(kvm->clock_files[cpu].frac, &kvm_frac); msg = (char *)&packet; size = sizeof(packet); ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, -- 2.35.1