Re: [PATCH dwarves 1/5] dwarves: help dwarf loader spot functions with optimized-out parameters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 25/01/2023 21:34, Eduard Zingerman wrote:
> On Wed, 2023-01-25 at 18:28 +0000, Alan Maguire wrote:
>> On 25/01/2023 17:47, Eduard Zingerman wrote:
>>> On Tue, 2023-01-24 at 13:45 +0000, Alan Maguire wrote:
>>>> Compilation generates DWARF at several stages, and often the
>>>> later DWARF representations more accurately represent optimizations
>>>> that have occurred during compilation.
>>>>
>>>> In particular, parameter representations can be spotted by their
>>>> abstract origin references to the original parameter, but they
>>>> often have more accurate location information.  In most cases,
>>>> the parameter locations will match calling conventions, and be
>>>> registers for the first 6 parameters on x86_64, first 8 on ARM64
>>>> etc.  If the parameter is not a register when it should be however,
>>>> it is likely passed via the stack or the compiler has used a
>>>> constant representation instead.
>>>>
>>>> This change adds a field to parameters and their associated
>>>> ftype to note if a parameter has been optimized out.  Having
>>>> this information allows us to skip such functions, as their
>>>> presence in CUs makes BTF encoding impossible.
>>>>
>>>> Signed-off-by: Alan Maguire <alan.maguire@xxxxxxxxxx>
>>>> ---
>>>>  dwarf_loader.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>>>  dwarves.h      |  4 +++-
>>>>  2 files changed, 77 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/dwarf_loader.c b/dwarf_loader.c
>>>> index 5a74035..0220f1d 100644
>>>> --- a/dwarf_loader.c
>>>> +++ b/dwarf_loader.c
>>>> @@ -992,13 +992,67 @@ static struct class_member *class_member__new(Dwarf_Die *die, struct cu *cu,
>>>>  	return member;
>>>>  }
>>>>  
>>>> -static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
>>>> +/* How many function parameters are passed via registers?  Used below in
>>>> + * determining if an argument has been optimized out or if it is simply
>>>> + * an argument > NR_REGISTER_PARAMS.  Setting NR_REGISTER_PARAMS to 0
>>>> + * allows unsupported architectures to skip tagging optimized-out
>>>> + * values.
>>>> + */
>>>> +#if defined(__x86_64__)
>>>> +#define NR_REGISTER_PARAMS      6
>>>> +#elif defined(__s390__)
>>>> +#define NR_REGISTER_PARAMS	5
>>>> +#elif defined(__aarch64__)
>>>> +#define NR_REGISTER_PARAMS      8
>>>> +#elif defined(__mips__)
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#elif defined(__powerpc__)
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#elif defined(__sparc__)
>>>> +#define NR_REGISTER_PARAMS	6
>>>> +#elif defined(__riscv) && __riscv_xlen == 64
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#elif defined(__arc__)
>>>> +#define NR_REGISTER_PARAMS	8
>>>> +#else
>>>> +#define NR_REGISTER_PARAMS      0
>>>> +#endif
>>>> +
>>>> +static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
>>>> +					struct conf_load *conf, int param_idx)
>>>>  {
>>>>  	struct parameter *parm = tag__alloc(cu, sizeof(*parm));
>>>>  
>>>>  	if (parm != NULL) {
>>>> +		struct location loc;
>>>> +
>>>>  		tag__init(&parm->tag, cu, die);
>>>>  		parm->name = attr_string(die, DW_AT_name, conf);
>>>> +
>>>> +		/* Parameters which use DW_AT_abstract_origin to point at
>>>> +		 * the original parameter definition (with no name in the DIE)
>>>> +		 * are the result of later DWARF generation during compilation
>>>> +		 * so often better take into account if arguments were
>>>> +		 * optimized out.
>>>> +		 *
>>>> +		 * By checking that locations for parameters that are expected
>>>> +		 * to be passed as registers are actually passed as registers,
>>>> +		 * we can spot optimized-out parameters.
>>>> +		 */
>>>> +		if (param_idx < NR_REGISTER_PARAMS && !parm->name &&
>>>> +		    attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
>>>> +		    loc.exprlen != 0) {
>>>> +			Dwarf_Op *expr = loc.expr;
>>>> +
>>>> +			switch (expr->atom) {
>>>> +			case DW_OP_reg1 ... DW_OP_reg31:
>>>> +			case DW_OP_breg0 ... DW_OP_breg31:
>>>> +				break;
>>>> +			default:
>>>> +				parm->optimized = true;
>>>> +				break;
>>>> +			}
>>>> +		}
>>>
>>> Hi Alan,
>>>
>>> I looked through the DWARF standard and found two relevant entries:
>>>
>>>> 4.1.4
>>>>
>>>> If no location attribute is present in a variable entry representing
>>>> the definition of a variable (...), or if the location attribute is
>>>> present but has an empty location description (...), the variable is
>>>> assumed to exist in the source code but not in the executable program
>>>> (but see number 10, below).
>>>
>>> This paragraph implies that parameter name presence or absence is
>>> irrelevant, but I don't have any examples when parameter name is
>>> present for a removed parameter.
>>>
>>>> 4.1.10
>>>>
>>>> A DW_AT_const_value attribute for an entry describing a variable or formal
>>>> parameter whose value is constant and not represented by an object in the
>>>> address space of the program, or an entry describing a named constant. (Note
>>>> that such an entry does not have a location attribute.)
>>>
>>> For this paragraph I have an example:
>>>
>>>     $ cat test.c
>>>     __attribute__((noinline))
>>>     static int f(int x, int y) {
>>>         return x + y;
>>>     }
>>>     
>>>     int main(int argc, char *argv[]) {
>>>         return f(1, 2) + f(1, 3);
>>>     }
>>>     
>>>     $ gcc --version | head -n1
>>>     gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
>>>     $ gcc -O2 -g -c test.c -o test.o
>>>     
>>> The objdump shows that constant propagation removed the first
>>> parameter of the function `f`:
>>>
>>>     $ llvm-objdump -d test.o 
>>>     
>>>     test.o:	file format elf64-x86-64
>>>     
>>>     Disassembly of section .text:
>>>     
>>>     0000000000000000 <f.constprop.0>:
>>>            0: 8d 47 01                     	leal	0x1(%rdi), %eax
>>>            3: c3                           	retq
>>>     
>>>     Disassembly of section .text.startup:
>>>     
>>>     0000000000000000 <main>:
>>>            0: f3 0f 1e fa                  	endbr64
>>>            4: bf 02 00 00 00               	movl	$0x2, %edi
>>>            9: e8 00 00 00 00               	callq	0xe <main+0xe>
>>>            e: bf 03 00 00 00               	movl	$0x3, %edi
>>>           13: 89 c2                        	movl	%eax, %edx
>>>           15: e8 00 00 00 00               	callq	0x1a <main+0x1a>
>>>           1a: 01 d0                        	addl	%edx, %eax
>>>           1c: c3                           	retq
>>>     
>>> However, the information about this parameter is still present in the DWARF:
>>>
>>>     $ llvm-dwarfdump test.o
>>>     ...
>>>     0x000000c1:   DW_TAG_subprogram
>>>                     DW_AT_name	("f")
>>>                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>>                     DW_AT_decl_line	(2)
>>>                     DW_AT_decl_column	(0x0c)
>>>                     DW_AT_prototyped	(true)
>>>                     DW_AT_type	(0x000000a9 "int")
>>>                     DW_AT_inline	(DW_INL_inlined)
>>>                     DW_AT_sibling	(0x000000e1)
>>>     
>>>     0x000000d0:     DW_TAG_formal_parameter
>>>                       DW_AT_name	("x")
>>>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>>                       DW_AT_decl_line	(2)
>>>                       DW_AT_decl_column	(0x12)
>>>                       DW_AT_type	(0x000000a9 "int")
>>>     
>>>     0x000000d8:     DW_TAG_formal_parameter
>>>                       DW_AT_name	("y")
>>>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>>>                       DW_AT_decl_line	(2)
>>>                       DW_AT_decl_column	(0x19)
>>>                       DW_AT_type	(0x000000a9 "int")
>>>     
>>>     0x000000e0:     NULL
>>>     
>>>     0x000000e1:   DW_TAG_subprogram
>>>                     DW_AT_abstract_origin	(0x000000c1 "f")
>>>                     DW_AT_low_pc	(0x0000000000000000)
>>>                     DW_AT_high_pc	(0x0000000000000004)
>>>                     DW_AT_frame_base	(DW_OP_call_frame_cfa)
>>>                     DW_AT_call_all_calls	(true)
>>>     
>>>     0x000000f8:     DW_TAG_formal_parameter
>>>                       DW_AT_abstract_origin	(0x000000d8 "y")
>>>                       DW_AT_location	(DW_OP_reg5 RDI)
>>>     
>>>     0x000000ff:     DW_TAG_formal_parameter
>>>                       DW_AT_abstract_origin	(0x000000d0 "x")
>>>                       DW_AT_const_value	(0x01)
>>>     
>>>     0x00000105:     NULL
>>>     
>>> When I ask pahole with this patch-set applied to generate BTF I see
>>> the following output:
>>>
>>>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>>>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>>>     Found 0 per-CPU variables!
>>>     Found 2 functions!
>>>     File test.o:
>>>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>>>     [2] PTR (anon) type_id=3
>>>     [3] PTR (anon) type_id=4
>>>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>>>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>>>     [6] FUNC main type_id=5
>>>     matched function 'f' with 'f.constprop.0'
>>>     added local function 'f'
>>>     matched function 'f' with 'f.constprop.0'
>>>     [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
>>>     [8] FUNC f type_id=7
>>>     
>>> Meaning that function `f` had not been skipped.
>>> A trivial modification overcomes this:
>>>
>>> 		if (param_idx < NR_REGISTER_PARAMS && !parm->name) {
>>> 			if (attr_location(die, &loc.expr, &loc.exprlen) == 0 &&
>>> 			    loc.exprlen != 0) {
>>> 				Dwarf_Op *expr = loc.expr;
>>>
>>> 				switch (expr->atom) {
>>> 				case DW_OP_reg1 ... DW_OP_reg31:
>>> 				case DW_OP_breg0 ... DW_OP_breg31:
>>> 					break;
>>> 				default:
>>> 					parm->optimized = true;
>>> 					break;
>>> 				}
>>> 			} else if (dwarf_attr(die, DW_AT_const_value, &attr) != NULL) {
>>> 					parm->optimized = true;
>>> 			}
>>>
>>> With it pahole seem to work as intended (if I understand the intention correctly):
>>>
>>>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>>>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>>>     Found 0 per-CPU variables!
>>>     Found 2 functions!
>>>     File test.o:
>>>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>>>     [2] PTR (anon) type_id=3
>>>     [3] PTR (anon) type_id=4
>>>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>>>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>>>     [6] FUNC main type_id=5
>>>     matched function 'f' with 'f.constprop.0', has optimized-out parameters
>>>     added local function 'f', optimized-out params
>>>     matched function 'f' with 'f.constprop.0', has optimized-out parameters
>>>     skipping addition of 'f' due to optimized-out parameters
>>>
>>> wdyt?
>>>
>>
>> This is great, thanks Eduard! I can add an additional patch
>> for the else clause code above, attributing that to you in v2 if
>> you like?
>>
>> Alan
>>
> 
> More on this topic. I tried the same example but with clang,
> DWARF generated by clang differs significantly.
> 
>     $ cat test.c
>     __attribute__((noinline))
>     static int f(int x, int y) {
>         return x + y;
>     }
>     
>     int main(int argc, char *argv[]) {
>         return f(1, 2) + f(1, 3);
>     }
>     
>     $ clang --version | head -n1
>     clang version 16.0.0 (https://github.com/llvm/llvm-project.git 50d4a1f70e111cd41b1a94d95fd06b5691aa2643)
>     
>     $ clang -O2 -g -c test.c -o test.o
> 
> llvm-objdump shows that the first parameter is still optimized out:
> 
>     $ llvm-objdump -d test.o 
>     
>     test.o:	file format elf64-x86-64
>     
>     Disassembly of section .text:
>     
>     0000000000000000 <main>:
>            0: 53                           	pushq	%rbx
>            1: bf 02 00 00 00               	movl	$0x2, %edi
>            6: e8 15 00 00 00               	callq	0x20 <f>
>            b: 89 c3                        	movl	%eax, %ebx
>            d: bf 03 00 00 00               	movl	$0x3, %edi
>           12: e8 09 00 00 00               	callq	0x20 <f>
>           17: 01 d8                        	addl	%ebx, %eax
>           19: 5b                           	popq	%rbx
>           1a: c3                           	retq
>           1b: 0f 1f 44 00 00               	nopl	(%rax,%rax)
>     
>     0000000000000020 <f>:
>           20: 8d 47 01                     	leal	0x1(%rdi), %eax
>           23: c3                           	retq
> 
> And here is the DWARF, note that formal parameter has both
> `DW_AT_name` and `DW_AT_const_value` attributes:
> 
>     $ llvm-dwarfdump test.o
>     ...
>     0x00000061:   DW_TAG_subprogram
>                     DW_AT_low_pc	(0x0000000000000020)
>                     DW_AT_high_pc	(0x0000000000000024)
>                     DW_AT_frame_base	(DW_OP_reg7 RSP)
>                     DW_AT_call_all_calls	(true)
>                     DW_AT_name	("f")
>                     DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                     DW_AT_decl_line	(2)
>                     DW_AT_prototyped	(true)
>                     DW_AT_calling_convention	(DW_CC_nocall)
>                     DW_AT_type	(0x00000085 "int")
>     
>     0x00000071:     DW_TAG_formal_parameter
>                       DW_AT_const_value	(1)
>                       DW_AT_name	("x")
>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                       DW_AT_decl_line	(2)
>                       DW_AT_type	(0x00000085 "int")
>     
>     0x0000007a:     DW_TAG_formal_parameter
>                       DW_AT_location	(DW_OP_reg5 RDI)
>                       DW_AT_name	("y")
>                       DW_AT_decl_file	("/home/eddy/work/tmp/test.c")
>                       DW_AT_decl_line	(2)
>                       DW_AT_type	(0x00000085 "int")
>     
>     0x00000084:     NULL
>     ...
> 
> Given this DWARF layout pahole does not recognize `x` as optimized out:
> 
>     $ pahole --verbose --btf_encode_detached=test.btf test.o
>     btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
>     Found 0 per-CPU variables!
>     Found 2 functions!
>     File test.o:
>     [1] INT int size=4 nr_bits=32 encoding=SIGNED
>     [2] PTR (anon) type_id=3
>     [3] PTR (anon) type_id=4
>     [4] INT char size=1 nr_bits=8 encoding=SIGNED
>     [5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
>     [6] FUNC main type_id=5
>     [7] FUNC_PROTO (anon) return=1 args=(1 x, 1 y)
>     [8] FUNC f type_id=7
> 
> The way I read paragraph 4.1.4 mentioned before the tag `DW_AT_name`
> should not be used to identify whether parameter is optimized out.
> Unfortunately trivial modification of the condition in the
> `parameter__new()` to remove the `!parm->name` check is not
> sufficient. For some reason parameters `x` and `y` are not visited in
> `ftype__recode_dwarf_types()` and thus `optimized_parms` field is not set.
> 

Thanks for this - I tried it, and we spot the optimization once we update
die__create_new_parameter() as follows:

diff --git a/dwarf_loader.c b/dwarf_loader.c
index f96b6ff..605ad45 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -1529,6 +1530,8 @@ static struct tag *die__create_new_parameter(Dwarf_Die *di
 
        if (ftype != NULL) {
                ftype__add_parameter(ftype, parm);
+               if (parm->optimized)
+                       ftype->optimized_parms = 1;
                if (param_idx >= 0) {
                        if (add_child_llvm_annotations(die, param_idx, conf, &(t
                                return NULL;


With that change, I see:

$ pahole --verbose --btf_encode_detached=test.btf test.o
btf_encoder__new: 'test.o' doesn't have '.data..percpu' section
Found 0 per-CPU variables!
Found 2 functions!
File test.o:
[1] INT int size=4 nr_bits=32 encoding=SIGNED
[2] PTR (anon) type_id=3
[3] PTR (anon) type_id=4
[4] INT char size=1 nr_bits=8 encoding=SIGNED
[5] FUNC_PROTO (anon) return=1 args=(1 argc, 2 argv)
[6] FUNC main type_id=5
added local function 'f', optimized-out params
skipping addition of 'f' due to optimized-out parameters

Thanks!

Alan

> Thanks,
> Eduard
> 
> 
> 
>>> Thanks,
>>> Eduard
>>>
>>>>  
>>>>  	return parm;
>>>> @@ -1450,7 +1504,7 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die,
>>>>  					     struct cu *cu, struct conf_load *conf,
>>>>  					     int param_idx)
>>>>  {
>>>> -	struct parameter *parm = parameter__new(die, cu, conf);
>>>> +	struct parameter *parm = parameter__new(die, cu, conf, param_idx);
>>>>  
>>>>  	if (parm == NULL)
>>>>  		return NULL;
>>>> @@ -2209,6 +2263,10 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>>>>  			}
>>>>  			pos->name = tag__parameter(dtype->tag)->name;
>>>>  			pos->tag.type = dtype->tag->type;
>>>> +			if (pos->optimized) {
>>>> +				tag__parameter(dtype->tag)->optimized = pos->optimized;
>>>> +				type->optimized_parms = 1;
>>>> +			}
>>>>  			continue;
>>>>  		}
>>>>  
>>>> @@ -2219,6 +2277,20 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
>>>>  		}
>>>>  		pos->tag.type = dtype->small_id;
>>>>  	}
>>>> +	/* if parameters were optimized out, set flag for the ftype this
>>>> +	 * function tag referred to via abstract origin.
>>>> +	 */
>>>> +	if (type->optimized_parms) {
>>>> +		struct dwarf_tag *dtype = type->tag.priv;
>>>> +		struct dwarf_tag *dftype;
>>>> +
>>>> +		dftype = dwarf_cu__find_tag_by_ref(dcu, &dtype->abstract_origin);
>>>> +		if (dftype && dftype->tag) {
>>>> +			struct ftype *ftype = tag__ftype(dftype->tag);
>>>> +
>>>> +			ftype->optimized_parms = 1;
>>>> +		}
>>>> +	}
>>>>  }
>>>>  
>>>>  static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
>>>> diff --git a/dwarves.h b/dwarves.h
>>>> index 589588e..1ad1b3b 100644
>>>> --- a/dwarves.h
>>>> +++ b/dwarves.h
>>>> @@ -808,6 +808,7 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
>>>>  struct parameter {
>>>>  	struct tag tag;
>>>>  	const char *name;
>>>> +	bool optimized;
>>>>  };
>>>>  
>>>>  static inline struct parameter *tag__parameter(const struct tag *tag)
>>>> @@ -827,7 +828,8 @@ struct ftype {
>>>>  	struct tag	 tag;
>>>>  	struct list_head parms;
>>>>  	uint16_t	 nr_parms;
>>>> -	uint8_t		 unspec_parms; /* just one bit is needed */
>>>> +	uint8_t		 unspec_parms:1; /* just one bit is needed */
>>>> +	uint8_t		 optimized_parms:1;
>>>>  };
>>>>  
>>>>  static inline struct ftype *tag__ftype(const struct tag *tag)
>>>
> 



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux