Re: [PATCH v3 16/18] perf sched: Fixes for thread safety analysis

Ian Rogers <irogers@xxxxxxxxxx> · Fri, 26 Aug 2022 10:48:00 -0700

On Fri, Aug 26, 2022 at 10:41 AM Adrian Hunter <adrian.hunter@xxxxxxxxx> wrote:
>
> On 26/08/22 19:06, Ian Rogers wrote:
> > On Fri, Aug 26, 2022 at 5:12 AM Adrian Hunter <adrian.hunter@xxxxxxxxx> wrote:
> >>
> >> On 24/08/22 18:38, Ian Rogers wrote:
> >>> Add annotations to describe lock behavior. Add unlocks so that mutexes
> >>> aren't conditionally held on exit from perf_sched__replay. Add an exit
> >>> variable so that thread_func can terminate, rather than leaving the
> >>> threads blocked on mutexes.
> >>>
> >>> Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
> >>> ---
> >>>  tools/perf/builtin-sched.c | 46 ++++++++++++++++++++++++--------------
> >>>  1 file changed, 29 insertions(+), 17 deletions(-)
> >>>
> >>> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> >>> index 7e4006d6b8bc..b483ff0d432e 100644
> >>> --- a/tools/perf/builtin-sched.c
> >>> +++ b/tools/perf/builtin-sched.c
> >>> @@ -246,6 +246,7 @@ struct perf_sched {
> >>>       const char      *time_str;
> >>>       struct perf_time_interval ptime;
> >>>       struct perf_time_interval hist_time;
> >>> +     volatile bool   thread_funcs_exit;
> >>>  };
> >>>
> >>>  /* per thread run time data */
> >>> @@ -633,31 +634,34 @@ static void *thread_func(void *ctx)
> >>>       prctl(PR_SET_NAME, comm2);
> >>>       if (fd < 0)
> >>>               return NULL;
> >>> -again:
> >>> -     ret = sem_post(&this_task->ready_for_work);
> >>> -     BUG_ON(ret);
> >>> -     mutex_lock(&sched->start_work_mutex);
> >>> -     mutex_unlock(&sched->start_work_mutex);
> >>>
> >>> -     cpu_usage_0 = get_cpu_usage_nsec_self(fd);
> >>> +     while (!sched->thread_funcs_exit) {
> >>> +             ret = sem_post(&this_task->ready_for_work);
> >>> +             BUG_ON(ret);
> >>> +             mutex_lock(&sched->start_work_mutex);
> >>> +             mutex_unlock(&sched->start_work_mutex);
> >>>
> >>> -     for (i = 0; i < this_task->nr_events; i++) {
> >>> -             this_task->curr_event = i;
> >>> -             perf_sched__process_event(sched, this_task->atoms[i]);
> >>> -     }
> >>> +             cpu_usage_0 = get_cpu_usage_nsec_self(fd);
> >>>
> >>> -     cpu_usage_1 = get_cpu_usage_nsec_self(fd);
> >>> -     this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
> >>> -     ret = sem_post(&this_task->work_done_sem);
> >>> -     BUG_ON(ret);
> >>> +             for (i = 0; i < this_task->nr_events; i++) {
> >>> +                     this_task->curr_event = i;
> >>> +                     perf_sched__process_event(sched, this_task->atoms[i]);
> >>> +             }
> >>>
> >>> -     mutex_lock(&sched->work_done_wait_mutex);
> >>> -     mutex_unlock(&sched->work_done_wait_mutex);
> >>> +             cpu_usage_1 = get_cpu_usage_nsec_self(fd);
> >>> +             this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
> >>> +             ret = sem_post(&this_task->work_done_sem);
> >>> +             BUG_ON(ret);
> >>>
> >>> -     goto again;
> >>> +             mutex_lock(&sched->work_done_wait_mutex);
> >>> +             mutex_unlock(&sched->work_done_wait_mutex);
> >>> +     }
> >>> +     return NULL;
> >>>  }
> >>>
> >>>  static void create_tasks(struct perf_sched *sched)
> >>> +     EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex)
> >>> +     EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex)
> >>>  {
> >>>       struct task_desc *task;
> >>>       pthread_attr_t attr;
> >>> @@ -687,6 +691,8 @@ static void create_tasks(struct perf_sched *sched)
> >>>  }
> >>>
> >>>  static void wait_for_tasks(struct perf_sched *sched)
> >>> +     EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> >>> +     EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
> >>>  {
> >>>       u64 cpu_usage_0, cpu_usage_1;
> >>>       struct task_desc *task;
> >>> @@ -738,6 +744,8 @@ static void wait_for_tasks(struct perf_sched *sched)
> >>>  }
> >>>
> >>>  static void run_one_test(struct perf_sched *sched)
> >>> +     EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> >>> +     EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
> >>>  {
> >>>       u64 T0, T1, delta, avg_delta, fluct;
> >>>
> >>> @@ -3309,11 +3317,15 @@ static int perf_sched__replay(struct perf_sched *sched)
> >>>       print_task_traces(sched);
> >>>       add_cross_task_wakeups(sched);
> >>>
> >>> +     sched->thread_funcs_exit = false;
> >>>       create_tasks(sched);
> >>>       printf("------------------------------------------------------------\n");
> >>>       for (i = 0; i < sched->replay_repeat; i++)
> >>>               run_one_test(sched);
> >>>
> >>> +     sched->thread_funcs_exit = true;
> >>> +     mutex_unlock(&sched->start_work_mutex);
> >>> +     mutex_unlock(&sched->work_done_wait_mutex);
> >>
> >> I think you still need to wait for the threads to exit before
> >> destroying the mutexes.
> >
> > This is a pre-existing issue and beyond the scope of this patch set.
>
> You added the mutex_destroy functions in patch 8, so it is still
> fallout from that.

In the previous code the threads were blocked on mutexes that were
stack allocated and the stack memory went away. You are correct to say
that to those locks I added an init and destroy call. The lifetime of
the mutex was wrong previously and remains wrong in this change.

Thanks,
Ian

> >
> > Thanks,
> > Ian
> >
> >>>       return 0;
> >>>  }
> >>>
> >>
>