On Wed, Oct 13, 2021 at 6:04 AM Steven Rostedt <rostedt@xxxxxxxxxxx> wrote: > > On Thu, 23 Sep 2021 12:45:24 +0300 > "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@xxxxxxxxx> wrote: > > @@ -175,6 +184,12 @@ static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str) > > dir_str, entry->d_name); > > kvm->vcpu_scalings[cpu] = strdup(path); > > } > > + if (!strncmp(entry->d_name, KVM_DEBUG_FRACTION_FILE, > > + strlen(KVM_DEBUG_FRACTION_FILE))) { > > I'm curious, why not just use strcmp(), or does "d_name" have more > characters than the fraction file? Just to be on the safe side. Currently the fraction file is with the longest name, but that may change in the future. > > > + snprintf(path, sizeof(path), "%s/%s", > > + dir_str, entry->d_name); > > + kvm->vcpu_frac[cpu] = strdup(path); > > + } > > } > > } > > if (!kvm->vcpu_offsets[cpu]) > > @@ -189,6 +204,8 @@ error: > > kvm->vcpu_offsets[cpu] = NULL; > > free(kvm->vcpu_scalings[cpu]); > > kvm->vcpu_scalings[cpu] = NULL; > > + free(kvm->vcpu_frac[cpu]); > > + kvm->vcpu_frac[cpu] = NULL; > > return -1; > > } > > > > @@ -254,7 +271,8 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync, > > kvm->vcpu_count = tsync->vcpu_count; > > kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *)); > > kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *)); > > - if (!kvm->vcpu_offsets || !kvm->vcpu_scalings) > > + kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *)); > > + if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac) > > goto error; > > if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0) > > goto error; > > @@ -263,6 +281,7 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync, > > error: > > free(kvm->vcpu_offsets); > > free(kvm->vcpu_scalings); > > + free(kvm->vcpu_frac); > > return -1; > > } > > > > @@ -353,6 +372,8 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync) > > kvm->vcpu_offsets[i] = NULL; > > free(kvm->vcpu_scalings[i]); > > kvm->vcpu_scalings[i] = NULL; > > + free(kvm->vcpu_frac[i]); > > + kvm->vcpu_frac[i] = NULL; > > } > > if (kvm->tep) > > tep_free(kvm->tep); > > @@ -364,7 +385,7 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync) > > } > > > > static int kvm_clock_host(struct tracecmd_time_sync *tsync, > > - long long *offset, long long *scaling, > > + long long *offset, long long *scaling, long long *frac, > > long long *timestamp, unsigned int cpu) > > { > > char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; > > @@ -374,6 +395,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync, > > long long kvm_scaling = 1; > ^^^ > Hmm, this is initialized here. > > > unsigned int sync_msg; > > long long kvm_offset; > > + long long kvm_frac; > > unsigned int size; > > char *msg; > > int ret; > > @@ -388,12 +410,18 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync, > > ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset); > > if (ret < 0) > > return -1; > > + > > + kvm_scaling = 1; > > Why are you adding the above? you are right, this one is useless. > > > if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) { > > read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling); > > if (kvm_scaling == KVM_SCALING_AMD_DEFAULT || > > kvm_scaling == KVM_SCALING_INTEL_DEFAULT) > > kvm_scaling = 1; > > } > > + > > + kvm_frac = 0; > > Should probably initialize that at the beginning too. > > > + if (kvm->vcpu_frac && kvm->vcpu_frac[cpu]) > > + ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac); > > msg = (char *)&packet; > > size = sizeof(packet); > > ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, > > @@ -405,6 +433,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync, > > > > packet.offset = -kvm_offset; > > packet.scaling = kvm_scaling; > > + packet.frac = kvm_frac; > > ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME, > > KVM_SYNC_PKT_RESPONSE, sizeof(packet), > > (char *)&packet); > > @@ -413,6 +442,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync, > > > > *scaling = packet.scaling; > > *offset = packet.offset; > > + *frac = kvm_frac; > > *timestamp = packet.ts; > > > > return 0; > > @@ -444,10 +474,10 @@ static int kvm_marker_find(struct tep_event *event, struct tep_record *record, > > return 0; > > } > > > > - > > static int kvm_clock_guest(struct tracecmd_time_sync *tsync, > > long long *offset, > > long long *scaling, > > + long long *frac, > > long long *timestamp) > > { > > char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; > > @@ -488,12 +518,13 @@ static int kvm_clock_guest(struct tracecmd_time_sync *tsync, > > > > *scaling = packet.scaling; > > *offset = packet.offset; > > + *frac = packet.frac; > > *timestamp = packet.ts; > > return 0; > > } > > > > static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync, > > - long long *offset, long long *scaling, > > + long long *offset, long long *scaling, long long *frac, > > long long *timestamp, unsigned int cpu) > > { > > struct clock_sync_context *clock_context; > > @@ -505,9 +536,9 @@ static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync, > > clock_context = (struct clock_sync_context *)tsync->context; > > > > if (clock_context->is_guest) > > - ret = kvm_clock_guest(tsync, offset, scaling, timestamp); > > + ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp); > > else > > - ret = kvm_clock_host(tsync, offset, scaling, timestamp, cpu); > > + ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu); > > return ret; > > } > > > > diff --git a/lib/trace-cmd/trace-timesync-ptp.c b/lib/trace-cmd/trace-timesync-ptp.c > > index b05f1cd0..70242ee3 100644 > > --- a/lib/trace-cmd/trace-timesync-ptp.c > > +++ b/lib/trace-cmd/trace-timesync-ptp.c > > @@ -663,7 +663,7 @@ static int ptp_clock_server(struct tracecmd_time_sync *tsync, > > } > > > > static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync, > > - long long *offset, long long *scaling, > > + long long *offset, long long *scaling, long long *frac, > > long long *timestamp, unsigned int cpu) > > { > > struct clock_sync_context *clock_context; > > @@ -689,6 +689,8 @@ static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync, > > > > if (scaling) > > *scaling = 1; > > + if (frac) > > + *frac = 0; > > if (clock_context->is_server) > > ret = ptp_clock_server(tsync, offset, timestamp); > > else > > diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c > > index 19ca19d7..298913a1 100644 > > --- a/lib/trace-cmd/trace-timesync.c > > +++ b/lib/trace-cmd/trace-timesync.c > > @@ -36,7 +36,7 @@ struct tsync_proto { > > int (*clock_sync_init)(struct tracecmd_time_sync *clock_context); > > int (*clock_sync_free)(struct tracecmd_time_sync *clock_context); > > int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context, > > - long long *offset, long long *scaling, > > + long long *offset, long long *scaling, long long *frac, > > long long *timestamp, unsigned int cpu); > > }; > > > > @@ -76,7 +76,7 @@ int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int role > > int (*init)(struct tracecmd_time_sync *), > > int (*free)(struct tracecmd_time_sync *), > > int (*calc)(struct tracecmd_time_sync *, > > - long long *, long long *, > > + long long *, long long *, long long *, > > long long *, unsigned int)) > > { > > struct tsync_proto *proto = NULL; > > @@ -137,12 +137,13 @@ bool __hidden tsync_proto_is_supported(const char *proto_name) > > * @ts: Array of size @count containing timestamps of callculated offsets > > * @offsets: array of size @count, containing offsets for each timestamp > > * @scalings: array of size @count, containing scaling ratios for each timestamp > > + * @frac: array of size @count, containing fraction bits for each timestamp > > * > > * Retuns -1 in case of an error, or 0 otherwise > > */ > > int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu, > > int *count, long long **ts, > > - long long **offsets, long long **scalings) > > + long long **offsets, long long **scalings, long long **frac) > > { > > struct clock_sync_context *tsync_context; > > > > @@ -159,6 +160,8 @@ int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu, > > *offsets = tsync_context->offsets[cpu].sync_offsets; > > if (scalings) > > *scalings = tsync_context->offsets[cpu].sync_scalings; > > + if (frac) > > + *frac = tsync_context->offsets[cpu].sync_frac; > > > > return 0; > > } > > @@ -564,9 +567,11 @@ void tracecmd_tsync_free(struct tracecmd_time_sync *tsync) > > free(tsync_context->offsets[i].sync_ts); > > free(tsync_context->offsets[i].sync_offsets); > > free(tsync_context->offsets[i].sync_scalings); > > + free(tsync_context->offsets[i].sync_frac); > > tsync_context->offsets[i].sync_ts = NULL; > > tsync_context->offsets[i].sync_offsets = NULL; > > tsync_context->offsets[i].sync_scalings = NULL; > > + tsync_context->offsets[i].sync_frac = NULL; > > tsync_context->offsets[i].sync_count = 0; > > tsync_context->offsets[i].sync_size = 0; > > } > > @@ -643,10 +648,11 @@ static int tsync_send(struct tracecmd_time_sync *tsync, > > long long timestamp = 0; > > long long scaling = 0; > > long long offset = 0; > > + long long frac = 0; > > int ret; > > > > old_set = pin_to_cpu(cpu); > > - ret = proto->clock_sync_calc(tsync, &offset, &scaling, ×tamp, cpu); > > + ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu); > > if (old_set) > > restore_pin_to_cpu(old_set); > > > > @@ -685,10 +691,11 @@ static void tsync_with_host(struct tracecmd_time_sync *tsync) > > } > > > > static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step, > > - long long offset, long long scaling, long long ts) > > + long long offset, long long scaling, long long frac, long long ts) > > { > > long long *sync_scalings = NULL; > > long long *sync_offsets = NULL; > > + long long *sync_frac = NULL; > > long long *sync_ts = NULL; > > > > if (offsets->sync_count >= offsets->sync_size) { > > @@ -698,22 +705,27 @@ static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step > > (offsets->sync_size + array_step) * sizeof(long long)); > > sync_scalings = realloc(offsets->sync_scalings, > > (offsets->sync_size + array_step) * sizeof(long long)); > > + sync_frac = realloc(offsets->sync_frac, > > + (offsets->sync_size + array_step) * sizeof(long long)); > > > > - if (!sync_ts || !sync_offsets || !sync_scalings) { > > + if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) { > > free(sync_ts); > > free(sync_offsets); > > free(sync_scalings); > > + free(sync_frac); > > return -1; > > } > > offsets->sync_size += array_step; > > offsets->sync_ts = sync_ts; > > offsets->sync_offsets = sync_offsets; > > offsets->sync_scalings = sync_scalings; > > + offsets->sync_frac = sync_frac; > > } > > > > offsets->sync_ts[offsets->sync_count] = ts; > > offsets->sync_offsets[offsets->sync_count] = offset; > > offsets->sync_scalings[offsets->sync_count] = scaling; > > + offsets->sync_frac[offsets->sync_count] = frac; > > offsets->sync_count++; > > > > return 0; > > @@ -726,9 +738,10 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu, > > long long timestamp = 0; > > long long scaling = 0; > > long long offset = 0; > > + long long frac = 0; > > int ret; > > > > - ret = proto->clock_sync_calc(tsync, &offset, &scaling, ×tamp, cpu); > > + ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu); > > if (ret) { > > tracecmd_warning("Failed to synchronize timestamps with guest"); > > return -1; > > @@ -739,7 +752,7 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu, > > if (!clock || cpu >= clock->cpu_count || !clock->offsets) > > return -1; > > return record_sync_sample(&clock->offsets[cpu], array_step, > > - offset, scaling, timestamp); > > + offset, scaling, frac, timestamp); > > } > > > > #define TIMER_SEC_NANO 1000000000LL > > @@ -928,6 +941,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, > > unsigned int flags; > > long long *scalings = NULL; > > long long *offsets = NULL; > > + long long *frac = NULL; > > long long *ts = NULL; > > int vcount; > > int count; > > @@ -936,7 +950,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, > > > > if (!tsync->vcpu_count) > > return -1; > > - vcount = 3 + (4 * tsync->vcpu_count); > > + vcount = 3 + (5 * tsync->vcpu_count); > > vector = calloc(vcount, sizeof(struct iovec)); > > if (!vector) > > return -1; > > @@ -955,7 +969,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, > > if (j >= vcount) > > break; > > ret = tracecmd_tsync_get_offsets(tsync, i, &count, > > - &ts, &offsets, &scalings); > > + &ts, &offsets, &scalings, NULL); > > if (ret < 0 || !count || !ts || !offsets || !scalings) > > break; > > vector[j].iov_len = 4; > > @@ -971,6 +985,21 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, > > ret = -1; > > goto out; > > } > > Should have a comment here to why this is a separate loop. I'm guessing > that the frac isn't added in the above loop due to backward compatibility? Yes, because of backward compatibility. Going to describe it in the next version. > > > + for (i = 0; i < tsync->vcpu_count; i++) { > > + if (j >= vcount) > > + break; > > + ret = tracecmd_tsync_get_offsets(tsync, i, NULL, > > + NULL, NULL, NULL, &frac); > > + if (ret < 0 || !frac) > > + break; > > + vector[j].iov_len = 8 * count; > > + vector[j++].iov_base = frac; > > + } > > + if (i < tsync->vcpu_count) { > > + ret = -1; > > + goto out; > > Hmm, so if frac doesn't exist we don't use this? Is this used if we > only have a tsc_offset but no scaling? > You are right, actually frac 0 could be a valid value - the check inside the loop "!frac" must be removed. If there is only tsc_offset, the logic should work. > -- Steve > > > + } > > + > > tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount); > > #ifdef TSYNC_DEBUG > > if (count > 1) > -- Tzvetomir (Ceco) Stoyanov VMware Open Source Technology Center