Hi Mike, Looks like I missed replying to your other comments on the code change. I apologize for that. Please see more of my comments inline. > -----Original Message----- > From: Mike Leach <mike.leach@xxxxxxxxxx> > Sent: Wednesday, September 20, 2023 3:46 AM > To: Besar Wicaksono <bwicaksono@xxxxxxxxxx> > Cc: james.clark@xxxxxxx; suzuki.poulose@xxxxxxx; linux-arm- > kernel@xxxxxxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; > coresight@xxxxxxxxxxxxxxxx; linux-tegra@xxxxxxxxxxxxxxx; Thierry Reding > <treding@xxxxxxxxxx>; Jonathan Hunter <jonathanh@xxxxxxxxxx>; Vikram > Sethi <vsethi@xxxxxxxxxx>; Richard Wiley <rwiley@xxxxxxxxxx>; Yifei Wan > <ywan@xxxxxxxxxx> > Subject: Re: [PATCH] perf cs-etm: Fix missing decoder for per-process trace > > External email: Use caution opening links or attachments > > > Hi, > > Can you provide a perf command line for both the record and decode > phases that demonstrates the problem you are having? > perf record commands: - perf record -e cs_etm//u -- taskset -c 3,4 ./test - perf record -e cs_etm//u --per-thread -- taskset -c 3,4 ./test Perf report command: - perf report --stdio --dump > Also note the cpu / trace protocol types > This is on Neoverse V2 with ETE source and TRBE sink. > On Tue, 19 Sept 2023 at 23:47, Besar Wicaksono <bwicaksono@xxxxxxxxxx> > wrote: > > > > The decoder creation for raw trace uses metadata from the first CPU. > > On per-process/per-thread traces, the first CPU is CPU0. If CPU0 trace > > is not enabled, its metadata will be marked unused and the decoder is > > not created. Perf report dump skips the decoding part because the > > decoder is missing. > > > > To fix this, use metadata of the CPU associated with sample object. > > > > Signed-off-by: Besar Wicaksono <bwicaksono@xxxxxxxxxx> > > --- > > tools/perf/util/cs-etm.c | 130 +++++++++++++++++++++++---------------- > > 1 file changed, 77 insertions(+), 53 deletions(-) > > > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c > > index 9729d006550d..3c3179a5eac6 100644 > > --- a/tools/perf/util/cs-etm.c > > +++ b/tools/perf/util/cs-etm.c > > @@ -640,71 +640,94 @@ static void cs_etm__packet_dump(const char > *pkt_string) > > fflush(stdout); > > } > > > > -static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params > *t_params, > > - struct cs_etm_auxtrace *etm, int idx, > > - u32 etmidr) > > +static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params > *t_param, > > + u64 *metadata, u32 etmidr) > > { > > - u64 **metadata = etm->metadata; > > - > > - t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); > > - t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; > > - t_params[idx].etmv3.reg_trc_id = > metadata[idx][CS_ETM_ETMTRACEIDR]; > > + t_param->protocol = cs_etm__get_v7_protocol_version(etmidr); > > + t_param->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; > > + t_param->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; > > } > > > > This is inherently incorrect - you are assuming that all trace devices > on a given system are the same and have the same parameters. This does > not have to be the case. > The intention here is the t_params index and metadata index could be different. I was letting cs_etm__init_trace_params to provide the correct t_param and metadata pointer. Maybe a different way is just by adding separate index argument for the metadata ? For example: @@ -641,14 +641,14 @@ static void cs_etm__packet_dump(const char *pkt_string) } static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, int idx, - u32 etmidr) + struct cs_etm_auxtrace *etm, int t_idx, + int m_idx, u32 etmidr) { u64 **metadata = etm->metadata; - t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); - t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; - t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; + t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr); + t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR]; + t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR]; } What do you think ? > > > -static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params > *t_params, > > - struct cs_etm_auxtrace *etm, int idx) > > +static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params > *t_param, > > + u64 *metadata) > > { > > - u64 **metadata = etm->metadata; > > + t_param->protocol = CS_ETM_PROTO_ETMV4i; > > + t_param->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; > > + t_param->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; > > + t_param->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; > > + t_param->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; > > + t_param->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; > > + t_param->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; > > +} > > > > - t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; > > - t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; > > - t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; > > - t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; > > - t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; > > - t_params[idx].etmv4.reg_configr = > metadata[idx][CS_ETMV4_TRCCONFIGR]; > > - t_params[idx].etmv4.reg_traceidr = > metadata[idx][CS_ETMV4_TRCTRACEIDR]; > > +static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params > *t_param, > > + u64 *metadata) > > +{ > > + t_param->protocol = CS_ETM_PROTO_ETE; > > + t_param->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; > > + t_param->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; > > + t_param->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; > > + t_param->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; > > + t_param->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; > > + t_param->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; > > + t_param->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; > > } > > > > as above for ETE & ETMv4 > > > -static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params > *t_params, > > - struct cs_etm_auxtrace *etm, int idx) > > +static int cs_etm__set_trace_param(struct cs_etm_trace_params *t_param, > > + u64 *metadata) > > { > > - u64 **metadata = etm->metadata; > > + u32 etmidr; > > + u64 architecture = metadata[CS_ETM_MAGIC]; > > + > > + switch (architecture) { > > + case __perf_cs_etmv3_magic: > > + etmidr = metadata[CS_ETM_ETMIDR]; > > + cs_etm__set_trace_param_etmv3(t_param, metadata, etmidr); > > + break; > > + case __perf_cs_etmv4_magic: > > + cs_etm__set_trace_param_etmv4(t_param, metadata); > > + break; > > + case __perf_cs_ete_magic: > > + cs_etm__set_trace_param_ete(t_param, metadata); > > + break; > > + default: > > + return -EINVAL; > > + } > > > > - t_params[idx].protocol = CS_ETM_PROTO_ETE; > > - t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0]; > > - t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1]; > > - t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2]; > > - t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8]; > > - t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR]; > > - t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR]; > > - t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; > > + return 0; > > } > > > > static int cs_etm__init_trace_params(struct cs_etm_trace_params > *t_params, > > struct cs_etm_auxtrace *etm, > > + bool formatted, > > + int sample_cpu, > > int decoders) > > { > > - int i; > > - u32 etmidr; > > - u64 architecture; > > - > > - for (i = 0; i < decoders; i++) { > > - architecture = etm->metadata[i][CS_ETM_MAGIC]; > > + int i, ret; > > + struct cs_etm_trace_params *t_param; > > + u64 *metadata; > > > > - switch (architecture) { > > - case __perf_cs_etmv3_magic: > > - etmidr = etm->metadata[i][CS_ETM_ETMIDR]; > > - cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); > > - break; > > - case __perf_cs_etmv4_magic: > > - cs_etm__set_trace_param_etmv4(t_params, etm, i); > > - break; > > - case __perf_cs_ete_magic: > > - cs_etm__set_trace_param_ete(t_params, etm, i); > > - break; > > - default: > > + if (!formatted) { > > + /* > > + * There is only one decoder when unformatted. Use metadata of > > + * sample AUX cpu. > > + */ > > > Unformatted ETE data should not only have a single decoder - there > must be a decoder per CPU - as per all trace decode flows. The key > difference with ETE is that it usually has its own trace sink (TRBE), > so no formatting is needed, but per cpu decoder is still required. > The change is in the context of queue allocation. My understanding is there is one CPU per queue when unformatted, which I think is aligned with your statement. I could mention one decoder per CPU to clarify more. Pasting the documentation under cs_etm__alloc_queue: /* * Each queue can only contain data from one CPU when unformatted, so only one decoder is * needed. */ Thanks, Besar > Regards > > Mike > > > > + t_param = t_params; > > + metadata = get_cpu_data(etm, sample_cpu); > > + if (!metadata) { > > + pr_err("CS_ETM: invalid sample CPU: %d\n", sample_cpu); > > return -EINVAL; > > } > > + > > + return cs_etm__set_trace_param(t_param, metadata); > > + } > > + > > + for (i = 0; i < decoders; i++) { > > + t_param = &t_params[i]; > > + metadata = etm->metadata[i]; > > + ret = cs_etm__set_trace_param(t_param, metadata); > > + if (ret) > > + return ret; > > } > > > > return 0; > > @@ -1016,7 +1039,7 @@ static u32 cs_etm__mem_access(struct > cs_etm_queue *etmq, u8 trace_chan_id, > > } > > > > static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace > *etm, > > - bool formatted) > > + bool formatted, int sample_cpu) > > { > > struct cs_etm_decoder_params d_params; > > struct cs_etm_trace_params *t_params = NULL; > > @@ -1041,7 +1064,7 @@ static struct cs_etm_queue > *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, > > if (!t_params) > > goto out_free; > > > > - if (cs_etm__init_trace_params(t_params, etm, decoders)) > > + if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, > decoders)) > > goto out_free; > > > > /* Set decoder parameters to decode trace packets */ > > @@ -1081,14 +1104,15 @@ static struct cs_etm_queue > *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, > > static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, > > struct auxtrace_queue *queue, > > unsigned int queue_nr, > > - bool formatted) > > + bool formatted, > > + int sample_cpu) > > { > > struct cs_etm_queue *etmq = queue->priv; > > > > if (list_empty(&queue->head) || etmq) > > return 0; > > > > - etmq = cs_etm__alloc_queue(etm, formatted); > > + etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu); > > > > if (!etmq) > > return -ENOMEM; > > @@ -2816,7 +2840,7 @@ static int > cs_etm__process_auxtrace_event(struct perf_session *session, > > * formatted in piped mode (true). > > */ > > err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], > > - idx, true); > > + idx, true, -1); > > if (err) > > return err; > > > > @@ -3022,7 +3046,7 @@ static int cs_etm__queue_aux_fragment(struct > perf_session *session, off_t file_o > > idx = auxtrace_event->idx; > > formatted = !(aux_event->flags & > PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); > > return cs_etm__setup_queue(etm, &etm- > >queues.queue_array[idx], > > - idx, formatted); > > + idx, formatted, sample->cpu); > > } > > > > /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not > found' */ > > -- > > 2.34.1 > > > > > -- > Mike Leach > Principal Engineer, ARM Ltd. > Manchester Design Centre. UK