On Tue, Dec 12, 2023 at 6:36 AM James Clark <james.clark@xxxxxxx> wrote: > > > > On 12/12/2023 14:17, James Clark wrote: > > > > > > On 29/11/2023 06:02, Ian Rogers wrote: > >> Rather than iterate all CPUs and see if they are in CPU maps, directly > >> iterate the CPU map. Similarly make use of the intersect > >> function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more > >> appropriate alternatives. > >> > >> Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx> > >> --- > >> tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++---------------- > >> tools/perf/arch/arm64/util/arm-spe.c | 4 +- > >> 2 files changed, 34 insertions(+), 47 deletions(-) > >> > >> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c > >> index 77e6663c1703..a68a72f2f668 100644 > >> --- a/tools/perf/arch/arm/util/cs-etm.c > >> +++ b/tools/perf/arch/arm/util/cs-etm.c > >> @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr, > >> static int cs_etm_validate_config(struct auxtrace_record *itr, > >> struct evsel *evsel) > >> { > >> - int i, err = -EINVAL; > >> + int idx, err = -EINVAL; > >> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus; > >> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); > >> + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); > >> + struct perf_cpu cpu; > >> > >> - /* Set option of each CPU we have */ > >> - for (i = 0; i < cpu__max_cpu().cpu; i++) { > >> - struct perf_cpu cpu = { .cpu = i, }; > >> - > >> - /* > >> - * In per-cpu case, do the validation for CPUs to work with. > >> - * In per-thread case, the CPU map is empty. Since the traced > >> - * program can run on any CPUs in this case, thus don't skip > >> - * validation. > >> - */ > >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) && > >> - !perf_cpu_map__has(event_cpus, cpu)) > >> - continue; > > > > This has broken validation for per-thread sessions. > > perf_cpu_map__intersect() doesn't seem to be able to handle the case > > where an 'any' map intersected with an online map should return the > > online map. Or at least it should for this to work, and it seems to make > > sense for it to work that way. > > > > At least that was my initial impression, but I only debugged it and saw > > that the loop is now skipped entirely. > > > >> - > >> - if (!perf_cpu_map__has(online_cpus, cpu)) > >> - continue; > >> + perf_cpu_map__put(online_cpus); > >> > >> - err = cs_etm_validate_context_id(itr, evsel, i); > >> + /* > >> + * Set option of each CPU we have. In per-cpu case, do the validation > >> + * for CPUs to work with. In per-thread case, the CPU map is empty. > >> + * Since the traced program can run on any CPUs in this case, thus don't > >> + * skip validation. > >> + */ > >> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) { > >> + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu); > >> if (err) > >> goto out; > >> - err = cs_etm_validate_timestamp(itr, evsel, i); > >> + err = cs_etm_validate_timestamp(itr, evsel, idx); I think this is an error, idx shouldn't be used here, cpu.cpu should. > >> if (err) > >> goto out; > >> } > >> > >> err = 0; > >> out: > >> - perf_cpu_map__put(online_cpus); > >> + perf_cpu_map__put(intersect_cpus); > >> return err; > >> } > >> > >> @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, > >> * Also the case of per-cpu mmaps, need the contextID in order to be notified > >> * when a context switch happened. > >> */ > >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { > >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { > >> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, > >> "timestamp", 1); > >> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, > >> @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, > >> evsel->core.attr.sample_period = 1; > >> > >> /* In per-cpu case, always need the time of mmap events etc */ > >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) > >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) > >> evsel__set_sample_bit(evsel, TIME); > >> > >> err = cs_etm_validate_config(itr, cs_etm_evsel); > >> @@ -533,38 +527,32 @@ static size_t > >> cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, > >> struct evlist *evlist __maybe_unused) > >> { > >> - int i; > >> + int idx; > >> int etmv3 = 0, etmv4 = 0, ete = 0; > >> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; > >> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); > >> + struct perf_cpu cpu; > >> > >> /* cpu map is not empty, we have specific CPUs to work with */ > >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { > >> - for (i = 0; i < cpu__max_cpu().cpu; i++) { > >> - struct perf_cpu cpu = { .cpu = i, }; > >> - > >> - if (!perf_cpu_map__has(event_cpus, cpu) || > >> - !perf_cpu_map__has(online_cpus, cpu)) > >> - continue; > >> + if (!perf_cpu_map__is_empty(event_cpus)) { > >> + struct perf_cpu_map *intersect_cpus = > >> + perf_cpu_map__intersect(event_cpus, online_cpus); > >> > >> - if (cs_etm_is_ete(itr, i)) > >> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) { > >> + if (cs_etm_is_ete(itr, cpu.cpu)) > > Similar problem here. For a per-thread session, the CPU map is not empty > (it's an 'any' map, presumably length 1), so it comes into this first > if, rather than the else below which is for the 'any' scenario. > > Then the intersect with online CPUs results in an empty map, so no CPU > metadata is recorded, then the session fails. > > If you made the intersect work in the way I mentioned above we could > also delete the else below, because that's just another way to convert > from 'any' to 'all online'. I don't think intersect of "all online" with an "any CPU" should return "all online" as these would be quite different options to perf_event_open. Let's see if the issue above fixes this change otherwise I can revert it to a more mechanical translation of the existing code into the new APIs. Thanks, Ian > >> ete++; > >> - else if (cs_etm_is_etmv4(itr, i)) > >> + else if (cs_etm_is_etmv4(itr, cpu.cpu)) > >> etmv4++; > >> else > >> etmv3++; > >> } > >> + perf_cpu_map__put(intersect_cpus); > >> } else { > >> /* get configuration for all CPUs in the system */ > >> - for (i = 0; i < cpu__max_cpu().cpu; i++) { > >> - struct perf_cpu cpu = { .cpu = i, }; > >> - > >> - if (!perf_cpu_map__has(online_cpus, cpu)) > >> - continue; > >> - > >> - if (cs_etm_is_ete(itr, i)) > >> + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) { > >> + if (cs_etm_is_ete(itr, cpu.cpu)) > >> ete++; > >> - else if (cs_etm_is_etmv4(itr, i)) > >> + else if (cs_etm_is_etmv4(itr, cpu.cpu)) > >> etmv4++; > >> else > >> etmv3++; > >> @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, > >> return -EINVAL; > >> > >> /* If the cpu_map is empty all online CPUs are involved */ > >> - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { > >> + if (perf_cpu_map__is_empty(event_cpus)) { > >> cpu_map = online_cpus; > >> } else { > >> /* Make sure all specified CPUs are online */ > >> - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) { > >> - struct perf_cpu cpu = { .cpu = i, }; > >> + struct perf_cpu cpu; > >> > >> - if (perf_cpu_map__has(event_cpus, cpu) && > >> - !perf_cpu_map__has(online_cpus, cpu)) > >> + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) { > >> + if (!perf_cpu_map__has(online_cpus, cpu)) > >> return -EINVAL; > >> } > >> > >> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c > >> index 51ccbfd3d246..0b52e67edb3b 100644 > >> --- a/tools/perf/arch/arm64/util/arm-spe.c > >> +++ b/tools/perf/arch/arm64/util/arm-spe.c > >> @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, > >> * In the case of per-cpu mmaps, sample CPU for AUX event; > >> * also enable the timestamp tracing for samples correlation. > >> */ > >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { > >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { > >> evsel__set_sample_bit(arm_spe_evsel, CPU); > >> evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, > >> "ts_enable", 1); > >> @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, > >> tracking_evsel->core.attr.sample_period = 1; > >> > >> /* In per-cpu case, always need the time of mmap events etc */ > >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { > >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { > >> evsel__set_sample_bit(tracking_evsel, TIME); > >> evsel__set_sample_bit(tracking_evsel, CPU); > >>