On 04/10/2023 01:32, Besar Wicaksono wrote: > The decoder creation for raw trace uses metadata from the first CPU. > On per-cpu mode, this metadata is incorrectly used for every decoder. > On per-process/per-thread traces, the first CPU is CPU0. If CPU0 trace > is not enabled, its metadata will be marked unused and the decoder is > not created. Perf report dump skips the decoding part because the > decoder is missing. > > To fix this, use metadata of the CPU associated with sample object. > > Signed-off-by: Besar Wicaksono <bwicaksono@xxxxxxxxxx> > --- > > Changes from v1: > * Update commit message > * Add fallback to CPU-0 metadata if sample CPU id is not available > * Preserve cs_etm__set_trace_param_* arguments and just breakdown the index > parameter into trace-param and metadata indexes > Thanks to Mike and James for the feedback. > v1: https://lore.kernel.org/lkml/20230919224553.1658-1-bwicaksono@xxxxxxxxxx/T/#u > > --- > tools/perf/util/cs-etm.c | 106 ++++++++++++++++++++++++--------------- > 1 file changed, 65 insertions(+), 41 deletions(-) > There is a trivial conflict on the perf-tools-next repo that it's probably worth rebasing for, otherwise: Reviewed-by: James Clark <james.clark@xxxxxxx> Also should probably include linux-perf-users@xxxxxxxxxxxxxxx as tools changes actually go through the perf tree rather than the coresight one. > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c > index 1419b40dfbe8..3abe68a9981e 100644 > --- a/tools/perf/util/cs-etm.c > +++ b/tools/perf/util/cs-etm.c > @@ -293,22 +293,31 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) > }) > > /* > - * Get a metadata for a specific cpu from an array. > + * Get a metadata index for a specific cpu from an array. > * > */ > -static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) > +static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) > { > int i; > - u64 *metadata = NULL; > > for (i = 0; i < etm->num_cpu; i++) { > if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { > - metadata = etm->metadata[i]; > - break; > + return i; > } > } > > - return metadata; > + return -1; > +} > + > +/* > + * Get a metadata for a specific cpu from an array. > + * > + */ > +static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) > +{ > + int idx = get_cpu_data_idx(etm, cpu); > + > + return (idx != -1) ? etm->metadata[idx] : NULL; > } > > /* > @@ -651,66 +660,80 @@ static void cs_etm__packet_dump(const char *pkt_string) > } > > static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, > - struct cs_etm_auxtrace *etm, int idx, > - u32 etmidr) > + struct cs_etm_auxtrace *etm, int t_idx, > + int m_idx, u32 etmidr) > { > u64 **metadata = etm->metadata; > > - t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); > - t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; > - t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; > + t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr); > + t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR]; > + t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR]; > } > > static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, > - struct cs_etm_auxtrace *etm, int idx) > + struct cs_etm_auxtrace *etm, int t_idx, > + int m_idx) > { > u64 **metadata = etm->metadata; > > - t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; > - t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; > - t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; > - t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; > - t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; > - t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; > - t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; > + t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i; > + t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0]; > + t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1]; > + t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2]; > + t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8]; > + t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR]; > + t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR]; > } > > static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, > - struct cs_etm_auxtrace *etm, int idx) > + struct cs_etm_auxtrace *etm, int t_idx, > + int m_idx) > { > u64 **metadata = etm->metadata; > > - t_params[idx].protocol = CS_ETM_PROTO_ETE; > - t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0]; > - t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1]; > - t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2]; > - t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8]; > - t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR]; > - t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR]; > - t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; > + t_params[t_idx].protocol = CS_ETM_PROTO_ETE; > + t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0]; > + t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1]; > + t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2]; > + t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8]; > + t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR]; > + t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR]; > + t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH]; > } > > static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, > struct cs_etm_auxtrace *etm, > + bool formatted, > + int sample_cpu, > int decoders) > { > - int i; > + int t_idx, m_idx; > u32 etmidr; > u64 architecture; > > - for (i = 0; i < decoders; i++) { > - architecture = etm->metadata[i][CS_ETM_MAGIC]; > + for (t_idx = 0; t_idx < decoders; t_idx++) { > + if (formatted) > + m_idx = t_idx; > + else { > + m_idx = get_cpu_data_idx(etm, sample_cpu); > + if (m_idx == -1) { > + pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n"); > + m_idx = 0; > + } > + } > + > + architecture = etm->metadata[m_idx][CS_ETM_MAGIC]; > > switch (architecture) { > case __perf_cs_etmv3_magic: > - etmidr = etm->metadata[i][CS_ETM_ETMIDR]; > - cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); > + etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR]; > + cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr); > break; > case __perf_cs_etmv4_magic: > - cs_etm__set_trace_param_etmv4(t_params, etm, i); > + cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx); > break; > case __perf_cs_ete_magic: > - cs_etm__set_trace_param_ete(t_params, etm, i); > + cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx); > break; > default: > return -EINVAL; > @@ -1026,7 +1049,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, > } > > static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, > - bool formatted) > + bool formatted, int sample_cpu) > { > struct cs_etm_decoder_params d_params; > struct cs_etm_trace_params *t_params = NULL; > @@ -1051,7 +1074,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, > if (!t_params) > goto out_free; > > - if (cs_etm__init_trace_params(t_params, etm, decoders)) > + if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders)) > goto out_free; > > /* Set decoder parameters to decode trace packets */ > @@ -1091,14 +1114,15 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, > static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, > struct auxtrace_queue *queue, > unsigned int queue_nr, > - bool formatted) > + bool formatted, > + int sample_cpu) > { > struct cs_etm_queue *etmq = queue->priv; > > if (list_empty(&queue->head) || etmq) > return 0; > > - etmq = cs_etm__alloc_queue(etm, formatted); > + etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu); > > if (!etmq) > return -ENOMEM; > @@ -2826,7 +2850,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, > * formatted in piped mode (true). > */ > err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], > - idx, true); > + idx, true, -1); > if (err) > return err; > > @@ -3032,7 +3056,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o > idx = auxtrace_event->idx; > formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); > return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], > - idx, formatted); > + idx, formatted, sample->cpu); > } > > /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */