> On 12-Oct-2023, at 9:20 AM, Namhyung Kim <namhyung@xxxxxxxxxx> wrote: > > The find_data_type() needs many information to describe the location of > the data. Add the new struct data_loc_info to pass those information at > once. > > No functional changes intended. > > Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx> > --- > tools/perf/util/annotate-data.c | 83 +++++++++++++++++---------------- > tools/perf/util/annotate-data.h | 38 ++++++++++++--- > tools/perf/util/annotate.c | 30 ++++++------ > 3 files changed, 91 insertions(+), 60 deletions(-) > > diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c > index 39bbd56b2160..90793cbb6aa0 100644 > --- a/tools/perf/util/annotate-data.c > +++ b/tools/perf/util/annotate-data.c > @@ -256,21 +256,28 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset, > } > > /* The result will be saved in @type_die */ > -static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, > - const char *var_name, struct annotated_op_loc *loc, > - Dwarf_Die *type_die) > +static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die) > { > + struct annotated_op_loc *loc = dloc->op; > Dwarf_Die cu_die, var_die; > Dwarf_Die *scopes = NULL; > int reg, offset; > int ret = -1; > int i, nr_scopes; > int fbreg = -1; > - bool is_fbreg = false; > int fb_offset = 0; > + bool is_fbreg = false; > + u64 pc; > + > + /* > + * IP is a relative instruction address from the start of the map, as > + * it can be randomized/relocated, it needs to translate to PC which is > + * a file address for DWARF processing. > + */ > + pc = map__rip_2objdump(dloc->ms->map, dloc->ip); > > /* Get a compile_unit for this address */ > - if (!find_cu_die(di, pc, &cu_die)) { > + if (!find_cu_die(dloc->di, pc, &cu_die)) { > pr_debug("cannot find CU for address %lx\n", pc); > ann_data_stat.no_cuinfo++; > return -1; > @@ -280,18 +287,19 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, > offset = loc->offset; > > if (reg == DWARF_REG_PC) { > - if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { > + if (die_find_variable_by_addr(&cu_die, pc, dloc->var_addr, > + &var_die, &offset)) { > ret = check_variable(&var_die, type_die, offset, > /*is_pointer=*/false); > - loc->offset = offset; > + dloc->type_offset = offset; > goto out; > } > > - if (var_name && die_find_variable_at(&cu_die, var_name, pc, > - &var_die)) { > - ret = check_variable(&var_die, type_die, 0, > + if (dloc->var_name && > + die_find_variable_at(&cu_die, dloc->var_name, pc, &var_die)) { > + ret = check_variable(&var_die, type_die, dloc->type_offset, > /*is_pointer=*/false); > - /* loc->offset will be updated by the caller */ > + /* dloc->type_offset was updated by the caller */ > goto out; > } > } > @@ -308,10 +316,11 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, > dwarf_formblock(&attr, &block) == 0 && block.length == 1) { > switch (*block.data) { > case DW_OP_reg0 ... DW_OP_reg31: > - fbreg = *block.data - DW_OP_reg0; > + fbreg = dloc->fbreg = *block.data - DW_OP_reg0; > break; > case DW_OP_call_frame_cfa: > - if (die_get_cfa(di->dbg, pc, &fbreg, > + dloc->fb_cfa = true; > + if (die_get_cfa(dloc->di->dbg, pc, &fbreg, > &fb_offset) < 0) > fbreg = -1; > break; > @@ -329,7 +338,7 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, > /* Search from the inner-most scope to the outer */ > for (i = nr_scopes - 1; i >= 0; i--) { > if (reg == DWARF_REG_PC) { > - if (!die_find_variable_by_addr(&scopes[i], pc, addr, > + if (!die_find_variable_by_addr(&scopes[i], pc, dloc->var_addr, > &var_die, &offset)) > continue; > } else { > @@ -342,7 +351,7 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, > /* Found a variable, see if it's correct */ > ret = check_variable(&var_die, type_die, offset, > reg != DWARF_REG_PC && !is_fbreg); > - loc->offset = offset; > + dloc->type_offset = offset; > goto out; > } > > @@ -361,50 +370,46 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, > > /** > * find_data_type - Return a data type at the location > - * @ms: map and symbol at the location > - * @ip: instruction address of the memory access > - * @loc: instruction operand location > - * @addr: data address of the memory access > - * @var_name: global variable name > + * @dloc: data location > * > * This functions searches the debug information of the binary to get the data > - * type it accesses. The exact location is expressed by (@ip, reg, offset) > - * for pointer variables or (@ip, @addr) for global variables. Note that global > - * variables might update the @loc->offset after finding the start of the variable. > - * If it cannot find a global variable by address, it tried to fine a declaration > - * of the variable using @var_name. In that case, @loc->offset won't be updated. > + * type it accesses. The exact location is expressed by (ip, reg, offset) > + * for pointer variables or (ip, addr) for global variables. Note that global > + * variables might update the @dloc->type_offset after finding the start of the > + * variable. If it cannot find a global variable by address, it tried to find > + * a declaration of the variable using var_name. In that case, @dloc->offset > + * won't be updated. > * > * It return %NULL if not found. > */ > -struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, > - struct annotated_op_loc *loc, u64 addr, > - const char *var_name) > +struct annotated_data_type *find_data_type(struct data_loc_info *dloc) > { > struct annotated_data_type *result = NULL; > - struct dso *dso = ms->map->dso; > - struct debuginfo *di; > + struct dso *dso = dloc->ms->map->dso; > Dwarf_Die type_die; > - u64 pc; > > - di = debuginfo__new(dso->long_name); > - if (di == NULL) { > + dloc->di = debuginfo__new(dso->long_name); > + if (dloc->di == NULL) { > pr_debug("cannot get the debug info\n"); > return NULL; > } > > /* > - * IP is a relative instruction address from the start of the map, as > - * it can be randomized/relocated, it needs to translate to PC which is > - * a file address for DWARF processing. > + * The type offset is the same as instruction offset by default. > + * But when finding a global variable, the offset won't be valid. > */ > - pc = map__rip_2objdump(ms->map, ip); > - if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0) > + if (dloc->var_name == NULL) > + dloc->type_offset = dloc->op->offset; > + > + dloc->fbreg = -1; > + > + if (find_data_type_die(dloc, &type_die) < 0) > goto out; > > result = dso__findnew_data_type(dso, &type_die); > > out: > - debuginfo__delete(di); > + debuginfo__delete(dloc->di); > return result; > } > > diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h > index 1b0db8e8c40e..ad6493ea2c8e 100644 > --- a/tools/perf/util/annotate-data.h > +++ b/tools/perf/util/annotate-data.h > @@ -8,6 +8,7 @@ > #include <linux/types.h> > > struct annotated_op_loc; > +struct debuginfo; > struct evsel; > struct map_symbol; > > @@ -72,6 +73,35 @@ struct annotated_data_type { > extern struct annotated_data_type unknown_type; > extern struct annotated_data_type stackop_type; > > +/** > + * struct data_loc_info - Data location information > + * @ms: Map and Symbol info > + * @ip: Instruction address > + * @var_addr: Data address (for global variables) > + * @var_name: Variable name (for global variables) > + * @op: Instruction operand location (regs and offset) > + * @di: Debug info > + * @fbreg: Frame base register > + * @fb_cfa: Whether the frame needs to check CFA > + * @type_offset: Final offset in the type > + */ > +struct data_loc_info { > + /* These are input field, should be filled by caller */ > + struct map_symbol *ms; > + u64 ip; > + u64 var_addr; > + const char *var_name; > + struct annotated_op_loc *op; > + > + /* These are used internally */ > + struct debuginfo *di; > + int fbreg; > + bool fb_cfa; > + > + /* This is for the result */ > + int type_offset; > +}; > + > /** > * struct annotated_data_stat - Debug statistics > * @total: Total number of entry > @@ -106,9 +136,7 @@ extern struct annotated_data_stat ann_data_stat; > #ifdef HAVE_DWARF_SUPPORT > > /* Returns data type at the location (ip, reg, offset) */ > -struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, > - struct annotated_op_loc *loc, u64 addr, > - const char *var_name); > +struct annotated_data_type *find_data_type(struct data_loc_info *dloc); > > /* Update type access histogram at the given offset */ > int annotated_data_type__update_samples(struct annotated_data_type *adt, > @@ -121,9 +149,7 @@ void annotated_data_type__tree_delete(struct rb_root *root); > #else /* HAVE_DWARF_SUPPORT */ > > static inline struct annotated_data_type * > -find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, > - struct annotated_op_loc *loc __maybe_unused, > - u64 addr __maybe_unused, const char *var_name __maybe_unused) > +find_data_type(struct data_loc_info *dloc __maybe_unused) > { > return NULL; > } Hi Namhyung, I have a thought there. For the type and typeoff sort keys to work, we depend on Dwarf support in perf, right ? If the perf is built without dwarf support ( either due to older elfutils or missing libdw.h ) , find_data_type will return NULL and we will get “unknown” for Data Type and Data Type Offset in the result. Can we add a pr_debug line in find_data_type (which is in else part for HAVE_DWARF_SUPPORT) so that user can understand if “unknown” is due to missing devel during the build ? Or may be restrict -s type,typeoff if dwarf support is missing ? Thanks Athira > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c > index fe0074bb98f0..1cf55f903ee4 100644 > --- a/tools/perf/util/annotate.c > +++ b/tools/perf/util/annotate.c > @@ -3744,9 +3744,7 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) > struct annotated_op_loc *op_loc; > struct annotated_data_type *mem_type; > struct annotated_item_stat *istat; > - u64 ip = he->ip, addr = 0; > - const char *var_name = NULL; > - int var_offset; > + u64 ip = he->ip; > int i; > > ann_data_stat.total++; > @@ -3794,51 +3792,53 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) > } > > for_each_insn_op_loc(&loc, i, op_loc) { > + struct data_loc_info dloc = { > + .ms = ms, > + /* Recalculate IP for LOCK prefix or insn fusion */ > + .ip = ms->sym->start + dl->al.offset, > + .op = op_loc, > + }; > + > if (!op_loc->mem_ref) > continue; > > /* Recalculate IP because of LOCK prefix or insn fusion */ > ip = ms->sym->start + dl->al.offset; > > - var_offset = op_loc->offset; > - > /* PC-relative addressing */ > if (op_loc->reg1 == DWARF_REG_PC) { > struct addr_location al; > struct symbol *var; > u64 map_addr; > > - addr = annotate_calc_pcrel(ms, ip, op_loc->offset, dl); > + dloc.var_addr = annotate_calc_pcrel(ms, ip, op_loc->offset, dl); > /* Kernel symbols might be relocated */ > - map_addr = addr + map__reloc(ms->map); > + map_addr = dloc.var_addr + map__reloc(ms->map); > > addr_location__init(&al); > var = thread__find_symbol_fb(he->thread, he->cpumode, > map_addr, &al); > if (var) { > - var_name = var->name; > + dloc.var_name = var->name; > /* Calculate type offset from the start of variable */ > - var_offset = map_addr - map__unmap_ip(al.map, var->start); > + dloc.type_offset = map_addr - map__unmap_ip(al.map, var->start); > } > addr_location__exit(&al); > } > > - mem_type = find_data_type(ms, ip, op_loc, addr, var_name); > + mem_type = find_data_type(&dloc); > if (mem_type) > istat->good++; > else > istat->bad++; > > - if (mem_type && var_name) > - op_loc->offset = var_offset; > - > if (symbol_conf.annotate_data_sample) { > annotated_data_type__update_samples(mem_type, evsel, > - op_loc->offset, > + dloc.type_offset, > he->stat.nr_events, > he->stat.period); > } > - he->mem_type_off = op_loc->offset; > + he->mem_type_off = dloc.type_offset; > return mem_type; > } > > -- > 2.42.0.655.g421f12c284-goog >