On Mon, 15 Mar 2021 08:18:19 +0200 "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@xxxxxxxxx> wrote: > Added a barrier in time synchronization threads to ensure the first time > synchronization passed before to start the trace. > > Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@xxxxxxxxx> > --- > lib/trace-cmd/trace-timesync.c | 15 ++++++++++++++- > 1 file changed, 14 insertions(+), 1 deletion(-) > > diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c > index 06853f9d..5995551e 100644 > --- a/lib/trace-cmd/trace-timesync.c > +++ b/lib/trace-cmd/trace-timesync.c > @@ -537,6 +537,7 @@ void tracecmd_tsync_free(struct tracecmd_time_sync *tsync) > tsync_context->sync_size = 0; > pthread_mutex_destroy(&tsync->lock); > pthread_cond_destroy(&tsync->cond); > + pthread_barrier_destroy(&tsync->first_sync); > free(tsync->clock_str); > free(tsync->proto_name); > free(tsync); > @@ -648,6 +649,7 @@ static int tsync_with_guest(struct tracecmd_time_sync *tsync) > int ts_array_size = CLOCK_TS_ARRAY; > struct tsync_proto *proto; > struct timespec timeout; > + bool first = true; > bool end = false; > int ret; > This function should always release the barrier, and not depend on the caller to do so on error. That is, have this: clock_context_init(tsync, &proto, false); - if (!tsync->context) + if (!tsync->context) { + pthread_barrier_wait(&tsync->first_sync); return -1; + } > @@ -666,6 +668,10 @@ static int tsync_with_guest(struct tracecmd_time_sync *tsync) > TRACECMD_TIME_SYNC_CMD_PROBE, > 0, NULL); > ret = tsync_get_sample(tsync, proto, ts_array_size); > + if (first) { > + first = false; > + pthread_barrier_wait(&tsync->first_sync); > + } > if (ret || end) > break; On error here, you will cause the caller to incorrectly call pthread_barrier_wait() again and get stuck. That's why I stated above that it this function must be responsible to release the barrier. This is why barriers can be dangerous. > if (tsync->loop_interval > 0) { > @@ -693,12 +699,17 @@ static int tsync_with_guest(struct tracecmd_time_sync *tsync) > static void *tsync_host_thread(void *data) > { > struct tracecmd_time_sync *tsync = NULL; > + int ret; > > tsync = (struct tracecmd_time_sync *)data; > - tsync_with_guest(tsync); > + ret = tsync_with_guest(tsync); > tracecmd_msg_handle_close(tsync->msg_handle); > tsync->msg_handle = NULL; > > + /* tsync with guest failed, release the barrier */ > + if (ret) > + pthread_barrier_wait(&tsync->first_sync); > + As stated above, do not do this here. -- Steve > pthread_exit(0); > } > > @@ -757,6 +768,7 @@ tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval, > tsync->clock_str = strdup(clock); > pthread_mutex_init(&tsync->lock, NULL); > pthread_cond_init(&tsync->cond, NULL); > + pthread_barrier_init(&tsync->first_sync, NULL, 2); > pthread_attr_init(&attrib); > pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE); > > @@ -767,6 +779,7 @@ tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval, > > if (!get_first_cpu(&pin_mask, &mask_size)) > pthread_setaffinity_np(tsync->thread, mask_size, pin_mask); > + pthread_barrier_wait(&tsync->first_sync); > > if (pin_mask) > CPU_FREE(pin_mask);