On Wed, 2023-01-25 at 18:28 +0000, Alan Maguire wrote: > On 25/01/2023 17:47, Eduard Zingerman wrote: > > On Tue, 2023-01-24 at 13:45 +0000, Alan Maguire wrote: > > > Compilation generates DWARF at several stages, and often the > > > later DWARF representations more accurately represent optimizations > > > that have occurred during compilation. > > > > > > In particular, parameter representations can be spotted by their > > > abstract origin references to the original parameter, but they > > > often have more accurate location information. In most cases, > > > the parameter locations will match calling conventions, and be > > > registers for the first 6 parameters on x86_64, first 8 on ARM64 > > > etc. If the parameter is not a register when it should be however, > > > it is likely passed via the stack or the compiler has used a > > > constant representation instead. > > > > > > This change adds a field to parameters and their associated > > > ftype to note if a parameter has been optimized out. Having > > > this information allows us to skip such functions, as their > > > presence in CUs makes BTF encoding impossible. > > > > > > Signed-off-by: Alan Maguire <alan.maguire@xxxxxxxxxx> > > > --- > > > dwarf_loader.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > > dwarves.h | 4 +++- > > > 2 files changed, 77 insertions(+), 3 deletions(-) > > > > > > diff --git a/dwarf_loader.c b/dwarf_loader.c > > > index 5a74035..0220f1d 100644 > > > --- a/dwarf_loader.c > > > +++ b/dwarf_loader.c > > > @@ -992,13 +992,67 @@ static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu, > > > return member; > > > } > > > > > > -static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) > > > +/* How many function parameters are passed via registers? Used below in > > > + * determining if an argument has been optimized out or if it is simply > > > + * an argument > NR_REGISTER_PARAMS. Setting NR_REGISTER_PARAMS to 0 > > > + * allows unsupported architectures to skip tagging optimized-out > > > + * values. > > > + */ > > > +#if defined(__x86_64__) > > > +#define NR_REGISTER_PARAMS 6 > > > +#elif defined(__s390__) > > > +#define NR_REGISTER_PARAMS 5 > > > +#elif defined(__aarch64__) > > > +#define NR_REGISTER_PARAMS 8 > > > +#elif defined(__mips__) > > > +#define NR_REGISTER_PARAMS 8 > > > +#elif defined(__powerpc__) > > > +#define NR_REGISTER_PARAMS 8 > > > +#elif defined(__sparc__) > > > +#define NR_REGISTER_PARAMS 6 > > > +#elif defined(__riscv) && __riscv_xlen == 64 > > > +#define NR_REGISTER_PARAMS 8 > > > +#elif defined(__arc__) > > > +#define NR_REGISTER_PARAMS 8 > > > +#else > > > +#define NR_REGISTER_PARAMS 0 > > > +#endif > > > + > > > +static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, > > > + struct conf_load *conf, int param_idx) > > > { > > > struct parameter *parm = tag__alloc(cu, sizeof(*parm)); > > > > > > if (parm != NULL) { > > > + struct location loc; > > > + > > > tag__init(&parm->tag, cu, die); > > > parm->name = attr_string(die, DW_AT_name, conf); > > > + > > > + /* Parameters which use DW_AT_abstract_origin to point at > > > + * the original parameter definition (with no name in the DIE) > > > + * are the result of later DWARF generation during compilation > > > + * so often better take into account if arguments were > > > + * optimized out. > > > + * > > > + * By checking that locations for parameters that are expected > > > + * to be passed as registers are actually passed as registers, > > > + * we can spot optimized-out parameters. > > > + */ > > > + if (param_idx < NR_REGISTER_PARAMS && !parm->name && > > > + attr_location(die, &loc.expr, &loc.exprlen) == 0 && > > > + loc.exprlen != 0) { > > > + Dwarf_Op *expr = loc.expr; > > > + > > > + switch (expr->atom) { > > > + case DW_OP_reg1 ... DW_OP_reg31: > > > + case DW_OP_breg0 ... DW_OP_breg31: > > > + break; > > > + default: > > > + parm->optimized = true; > > > + break; > > > + } > > > + } > > > > Hi Alan, > > > > I looked through the DWARF standard and found two relevant entries: > > > > > 4.1.4 > > > > > > If no location attribute is present in a variable entry representing > > > the definition of a variable (...), or if the location attribute is > > > present but has an empty location description (...), the variable is > > > assumed to exist in the source code but not in the executable program > > > (but see number 10, below). > > > > This paragraph implies that parameter name presence or absence is > > irrelevant, but I don't have any examples when parameter name is > > present for a removed parameter. > > > > > 4.1.10 > > > > > > A DW_AT_const_value attribute for an entry describing a variable or formal > > > parameter whose value is constant and not represented by an object in the > > > address space of the program, or an entry describing a named constant. (Note > > > that such an entry does not have a location attribute.) > > > > For this paragraph I have an example: > > > > $ cat test.c > > __attribute__((noinline)) > > static int f(int x, int y) { > > return x + y; > > } > > > > int main(int argc, char *argv[]) { > > return f(1, 2) + f(1, 3); > > } > > > > $ gcc --version | head -n1 > > gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 > > $ gcc -O2 -g -c test.c -o test.o > > > > The objdump shows that constant propagation removed the first > > parameter of the function `f`: > > > > $ llvm-objdump -d test.o > > > > test.o: file format elf64-x86-64 > > > > Disassembly of section .text: > > > > 0000000000000000 <f.constprop.0>: > > 0: 8d 47 01 leal 0x1(%rdi), %eax > > 3: c3 retq > > > > Disassembly of section .text.startup: > > > > 0000000000000000 <main>: > > 0: f3 0f 1e fa endbr64 > > 4: bf 02 00 00 00 movl $0x2, %edi > > 9: e8 00 00 00 00 callq 0xe <main+0xe> > > e: bf 03 00 00 00 movl $0x3, %edi > > 13: 89 c2 movl %eax, %edx > > 15: e8 00 00 00 00 callq 0x1a <main+0x1a> > > 1a: 01 d0 addl %edx, %eax > > 1c: c3 retq > > > > However, the information about this parameter is still present in the DWARF: > > > > $ llvm-dwarfdump test.o > > ... > > 0x000000c1: DW_TAG_subprogram > > DW_AT_name ("f") > > DW_AT_decl_file ("/home/eddy/work/tmp/test.c") > > DW_AT_decl_line (2) > > DW_AT_decl_column (0x0c) > > DW_AT_prototyped (true) > > DW_AT_type (0x000000a9 "int") > > DW_AT_inline (DW_INL_inlined) > > DW_AT_sibling (0x000000e1) > > > > 0x000000d0: DW_TAG_formal_parameter > > DW_AT_name ("x") > > DW_AT_decl_file ("/home/eddy/work/tmp/test.c") > > DW_AT_decl_line (2) > > DW_AT_decl_column (0x12) > > DW_AT_type (0x000000a9 "int") > > > > 0x000000d8: DW_TAG_formal_parameter > > DW_AT_name ("y") > > DW_AT_decl_file ("/home/eddy/work/tmp/test.c") > > DW_AT_decl_line (2) > > DW_AT_decl_column (0x19) > > DW_AT_type (0x000000a9 "int") > > > > 0x000000e0: NULL > > > > 0x000000e1: DW_TAG_subprogram > > DW_AT_abstract_origin (0x000000c1 "f") > > DW_AT_low_pc (0x0000000000000000) > > DW_AT_high_pc (0x0000000000000004) > > DW_AT_frame_base (DW_OP_call_frame_cfa) > > DW_AT_call_all_calls (true) > > > > 0x000000f8: DW_TAG_formal_parameter > > DW_AT_abstract_origin (0x000000d8 "y") > > DW_AT_location (DW_OP_reg5 RDI) > > > > 0x000000ff: DW_TAG_formal_parameter > > DW_AT_abstract_origin (0x000000d0 "x") > > DW_AT_const_value (0x01) > > > > 0x00000105: NULL > > > > When I ask pahole with this patch-set applied to generate BTF I see > > the following output: > > > > $ pahole --verbose --btf_encode_detached=test.btf test.o > > btf_encoder__new: 'test.o' doesn't have '.data..percpu' section > > Found 0 per-CPU variables! > > Found 2 functions! > > File test.o: > > [1] INT int size=4 nr_bits=32 encoding=SIGNED > > [2] PTR (anon) type_id=3 > > [3] PTR (anon) type_id=4 > > [4] INT char size=1 nr_bits=8 encoding=SIGNED > > [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv) > > [6] FUNC main type_id=5 > > matched function 'f' with 'f.constprop.0' > > added local function 'f' > > matched function 'f' with 'f.constprop.0' > > [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y) > > [8] FUNC f type_id=7 > > > > Meaning that function `f` had not been skipped. > > A trivial modification overcomes this: > > > > if (param_idx < NR_REGISTER_PARAMS && !parm->name) { > > if (attr_location(die, &loc.expr, &loc.exprlen) == 0 && > > loc.exprlen != 0) { > > Dwarf_Op *expr = loc.expr; > > > > switch (expr->atom) { > > case DW_OP_reg1 ... DW_OP_reg31: > > case DW_OP_breg0 ... DW_OP_breg31: > > break; > > default: > > parm->optimized = true; > > break; > > } > > } else if (dwarf_attr(die, DW_AT_const_value, &attr) != NULL) { > > parm->optimized = true; > > } > > > > With it pahole seem to work as intended (if I understand the intention correctly): > > > > $ pahole --verbose --btf_encode_detached=test.btf test.o > > btf_encoder__new: 'test.o' doesn't have '.data..percpu' section > > Found 0 per-CPU variables! > > Found 2 functions! > > File test.o: > > [1] INT int size=4 nr_bits=32 encoding=SIGNED > > [2] PTR (anon) type_id=3 > > [3] PTR (anon) type_id=4 > > [4] INT char size=1 nr_bits=8 encoding=SIGNED > > [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv) > > [6] FUNC main type_id=5 > > matched function 'f' with 'f.constprop.0', has optimized-out parameters > > added local function 'f', optimized-out params > > matched function 'f' with 'f.constprop.0', has optimized-out parameters > > skipping addition of 'f' due to optimized-out parameters > > > > wdyt? > > > > This is great, thanks Eduard! I can add an additional patch > for the else clause code above, attributing that to you in v2 if > you like? > > Alan > More on this topic. I tried the same example but with clang, DWARF generated by clang differs significantly. $ cat test.c __attribute__((noinline)) static int f(int x, int y) { return x + y; } int main(int argc, char *argv[]) { return f(1, 2) + f(1, 3); } $ clang --version | head -n1 clang version 16.0.0 (https://github.com/llvm/llvm-project.git 50d4a1f70e111cd41b1a94d95fd06b5691aa2643) $ clang -O2 -g -c test.c -o test.o llvm-objdump shows that the first parameter is still optimized out: $ llvm-objdump -d test.o test.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <main>: 0: 53 pushq %rbx 1: bf 02 00 00 00 movl $0x2, %edi 6: e8 15 00 00 00 callq 0x20 <f> b: 89 c3 movl %eax, %ebx d: bf 03 00 00 00 movl $0x3, %edi 12: e8 09 00 00 00 callq 0x20 <f> 17: 01 d8 addl %ebx, %eax 19: 5b popq %rbx 1a: c3 retq 1b: 0f 1f 44 00 00 nopl (%rax,%rax) 0000000000000020 <f>: 20: 8d 47 01 leal 0x1(%rdi), %eax 23: c3 retq And here is the DWARF, note that formal parameter has both `DW_AT_name` and `DW_AT_const_value` attributes: $ llvm-dwarfdump test.o ... 0x00000061: DW_TAG_subprogram DW_AT_low_pc (0x0000000000000020) DW_AT_high_pc (0x0000000000000024) DW_AT_frame_base (DW_OP_reg7 RSP) DW_AT_call_all_calls (true) DW_AT_name ("f") DW_AT_decl_file ("/home/eddy/work/tmp/test.c") DW_AT_decl_line (2) DW_AT_prototyped (true) DW_AT_calling_convention (DW_CC_nocall) DW_AT_type (0x00000085 "int") 0x00000071: DW_TAG_formal_parameter DW_AT_const_value (1) DW_AT_name ("x") DW_AT_decl_file ("/home/eddy/work/tmp/test.c") DW_AT_decl_line (2) DW_AT_type (0x00000085 "int") 0x0000007a: DW_TAG_formal_parameter DW_AT_location (DW_OP_reg5 RDI) DW_AT_name ("y") DW_AT_decl_file ("/home/eddy/work/tmp/test.c") DW_AT_decl_line (2) DW_AT_type (0x00000085 "int") 0x00000084: NULL ... Given this DWARF layout pahole does not recognize `x` as optimized out: $ pahole --verbose --btf_encode_detached=test.btf test.o btf_encoder__new: 'test.o' doesn't have '.data..percpu' section Found 0 per-CPU variables! Found 2 functions! File test.o: [1] INT int size=4 nr_bits=32 encoding=SIGNED [2] PTR (anon) type_id=3 [3] PTR (anon) type_id=4 [4] INT char size=1 nr_bits=8 encoding=SIGNED [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv) [6] FUNC main type_id=5 [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y) [8] FUNC f type_id=7 The way I read paragraph 4.1.4 mentioned before the tag `DW_AT_name` should not be used to identify whether parameter is optimized out. Unfortunately trivial modification of the condition in the `parameter__new()` to remove the `!parm->name` check is not sufficient. For some reason parameters `x` and `y` are not visited in `ftype__recode_dwarf_types()` and thus `optimized_parms` field is not set. Thanks, Eduard > > Thanks, > > Eduard > > > > > > > > return parm; > > > @@ -1450,7 +1504,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die, > > > struct cu *cu, struct conf_load *conf, > > > int param_idx) > > > { > > > - struct parameter *parm = parameter__new(die, cu, conf); > > > + struct parameter *parm = parameter__new(die, cu, conf, param_idx); > > > > > > if (parm == NULL) > > > return NULL; > > > @@ -2209,6 +2263,10 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu) > > > } > > > pos->name = tag__parameter(dtype->tag)->name; > > > pos->tag.type = dtype->tag->type; > > > + if (pos->optimized) { > > > + tag__parameter(dtype->tag)->optimized = pos->optimized; > > > + type->optimized_parms = 1; > > > + } > > > continue; > > > } > > > > > > @@ -2219,6 +2277,20 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu) > > > } > > > pos->tag.type = dtype->small_id; > > > } > > > + /* if parameters were optimized out, set flag for the ftype this > > > + * function tag referred to via abstract origin. > > > + */ > > > + if (type->optimized_parms) { > > > + struct dwarf_tag *dtype = type->tag.priv; > > > + struct dwarf_tag *dftype; > > > + > > > + dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin); > > > + if (dftype && dftype->tag) { > > > + struct ftype *ftype = tag__ftype(dftype->tag); > > > + > > > + ftype->optimized_parms = 1; > > > + } > > > + } > > > } > > > > > > static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu) > > > diff --git a/dwarves.h b/dwarves.h > > > index 589588e..1ad1b3b 100644 > > > --- a/dwarves.h > > > +++ b/dwarves.h > > > @@ -808,6 +808,7 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu, > > > struct parameter { > > > struct tag tag; > > > const char *name; > > > + bool optimized; > > > }; > > > > > > static inline struct parameter *tag__parameter(const struct tag *tag) > > > @@ -827,7 +828,8 @@ struct ftype { > > > struct tag tag; > > > struct list_head parms; > > > uint16_t nr_parms; > > > - uint8_t unspec_parms; /* just one bit is needed */ > > > + uint8_t unspec_parms:1; /* just one bit is needed */ > > > + uint8_t optimized_parms:1; > > > }; > > > > > > static inline struct ftype *tag__ftype(const struct tag *tag) > >