Re: double argument casting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,

While working on this casting issue I also noticed something I don't
quite understand when code is inlined.

Coming back to the test.c as previously defined in
http://gcc.gnu.org/ml/gcc-help/2010-10/msg00389.html, when it is
compiled using GCC 4.5.1 for SPARC V8 with the following command:

/opt/sparc-elf-4.5.1/bin/sparc-elf-gcc test.c -o test.elf -mcpu=v8
-msoft-float -e main -Os -Wl,--gc-sections

the dump is

000101ac <somme>:
   101ac:    85 2a 60 10     sll  %o1, 0x10, %g2
   101b0:    82 10 20 00     clr  %g1
   101b4:    10 80 00 03     b  101c0 <somme+0x14>
   101b8:    85 38 a0 10     sra  %g2, 0x10, %g2
   101bc:    82 00 60 01     inc  %g1
   101c0:    80 a0 40 02     cmp  %g1, %g2
   101c4:    26 bf ff fe     bl,a   101bc <somme+0x10>
   101c8:    90 02 00 08     add  %o0, %o0, %o0
   101cc:    91 2a 20 18     sll  %o0, 0x18, %o0
   101d0:    91 3a 20 18     sra  %o0, 0x18, %o0
   101d4:    81 c3 e0 08     retl
   101d8:    90 02 40 08     add  %o1, %o0, %o0

000101dc <somme2>:
   101dc:    84 10 00 09     mov  %o1, %g2
   101e0:    82 10 20 00     clr  %g1
   101e4:    87 28 a0 10     sll  %g2, 0x10, %g3
   101e8:    10 80 00 03     b  101f4 <somme2+0x18>
   101ec:    87 38 e0 10     sra  %g3, 0x10, %g3
   101f0:    82 00 60 01     inc  %g1
   101f4:    80 a0 40 03     cmp  %g1, %g3
   101f8:    26 bf ff fe     bl,a   101f0 <somme2+0x14>
   101fc:    90 02 00 02     add  %o0, %g2, %o0
   10200:    92 02 40 09     add  %o1, %o1, %o1
   10204:    91 2a 20 18     sll  %o0, 0x18, %o0
   10208:    91 3a 20 18     sra  %o0, 0x18, %o0
   1020c:    81 c3 e0 08     retl
   10210:    90 02 40 08     add  %o1, %o0, %o0

00010214 <main>:
   10214:    9d e3 bf 98     save  %sp, -104, %sp
   10218:    82 10 20 01     mov  1, %g1
   1021c:    c2 37 bf fc     sth  %g1, [ %fp + -4 ]
   10220:    c2 2f bf ff     stb  %g1, [ %fp + -1 ]
   10224:    c2 2f bf fe     stb  %g1, [ %fp + -2 ]
   10228:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   1022c:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
   10230:    91 2a 20 18     sll  %o0, 0x18, %o0
   10234:    93 2a 60 10     sll  %o1, 0x10, %o1
   10238:    91 3a 20 18     sra  %o0, 0x18, %o0
   1023c:    7f ff ff dc     call  101ac <somme>
   10240:    93 3a 60 10     sra  %o1, 0x10, %o1
   10244:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10248:    d0 0f bf fe     ldub  [ %fp + -2 ], %o0
   1024c:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
   10250:    91 2a 20 18     sll  %o0, 0x18, %o0
   10254:    93 2a 60 10     sll  %o1, 0x10, %o1
   10258:    91 3a 20 18     sra  %o0, 0x18, %o0
   1025c:    7f ff ff d4     call  101ac <somme>
   10260:    93 3a 60 10     sra  %o1, 0x10, %o1
   10264:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10268:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   1026c:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   10270:    91 2a 20 18     sll  %o0, 0x18, %o0
   10274:    93 2a 60 18     sll  %o1, 0x18, %o1
   10278:    91 3a 20 18     sra  %o0, 0x18, %o0
   1027c:    7f ff ff cc     call  101ac <somme>
   10280:    93 3a 60 18     sra  %o1, 0x18, %o1
   10284:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10288:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   1028c:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
   10290:    91 2a 20 18     sll  %o0, 0x18, %o0
   10294:    93 2a 60 10     sll  %o1, 0x10, %o1
   10298:    91 3a 20 18     sra  %o0, 0x18, %o0
   1029c:    7f ff ff d0     call  101dc <somme2>
   102a0:    93 3a 60 10     sra  %o1, 0x10, %o1
   102a4:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   102a8:    d0 0f bf fe     ldub  [ %fp + -2 ], %o0
   102ac:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
   102b0:    91 2a 20 18     sll  %o0, 0x18, %o0
   102b4:    93 2a 60 10     sll  %o1, 0x10, %o1
   102b8:    91 3a 20 18     sra  %o0, 0x18, %o0
   102bc:    7f ff ff c8     call  101dc <somme2>
   102c0:    93 3a 60 10     sra  %o1, 0x10, %o1
   102c4:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   102c8:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   102cc:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   102d0:    91 2a 20 18     sll  %o0, 0x18, %o0
   102d4:    93 2a 60 18     sll  %o1, 0x18, %o1
   102d8:    91 3a 20 18     sra  %o0, 0x18, %o0
   102dc:    7f ff ff c0     call  101dc <somme2>
   102e0:    93 3a 60 18     sra  %o1, 0x18, %o1
   102e4:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   102e8:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   102ec:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   102f0:    91 2a 20 18     sll  %o0, 0x18, %o0
   102f4:    93 2a 60 18     sll  %o1, 0x18, %o1
   102f8:    91 3a 20 18     sra  %o0, 0x18, %o0
   102fc:    7f ff ff b8     call  101dc <somme2>
   10300:    93 3a 60 17     sra  %o1, 0x17, %o1
   10304:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10308:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   1030c:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   10310:    93 2a 60 18     sll  %o1, 0x18, %o1
   10314:    93 3a 60 18     sra  %o1, 0x18, %o1
   10318:    82 02 40 09     add  %o1, %o1, %g1
   1031c:    91 2a 20 18     sll  %o0, 0x18, %o0
   10320:    92 00 40 09     add  %g1, %o1, %o1
   10324:    91 3a 20 18     sra  %o0, 0x18, %o0
   10328:    93 2a 60 10     sll  %o1, 0x10, %o1
   1032c:    7f ff ff ac     call  101dc <somme2>
   10330:    93 3a 60 10     sra  %o1, 0x10, %o1
   10334:    b0 10 20 00     clr  %i0
   10338:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   1033c:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   10340:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   10344:    91 2a 20 18     sll  %o0, 0x18, %o0
   10348:    93 2a 60 18     sll  %o1, 0x18, %o1
   1034c:    91 3a 20 18     sra  %o0, 0x18, %o0
   10350:    7f ff ff a3     call  101dc <somme2>
   10354:    93 3a 60 16     sra  %o1, 0x16, %o1
   10358:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   1035c:    81 c7 e0 08     ret
   10360:    81 e8 00 00     restore



whereas if the options: "-combine -fwhole-program" are added to the same
command, the resulting code is:

000101ac <somme2>:
   101ac:    82 10 20 00     clr  %g1
   101b0:    86 10 00 09     mov  %o1, %g3
   101b4:    10 80 00 03     b  101c0 <somme2+0x14>
   101b8:    84 10 00 09     mov  %o1, %g2
   101bc:    82 00 60 01     inc  %g1
   101c0:    80 a0 40 03     cmp  %g1, %g3
   101c4:    26 bf ff fe     bl,a   101bc <somme2+0x10>
   101c8:    90 02 00 02     add  %o0, %g2, %o0
   101cc:    92 02 40 09     add  %o1, %o1, %o1
   101d0:    91 2a 20 18     sll  %o0, 0x18, %o0
   101d4:    91 3a 20 18     sra  %o0, 0x18, %o0
   101d8:    81 c3 e0 08     retl
   101dc:    90 02 40 08     add  %o1, %o0, %o0

000101e0 <main>:
   101e0:    9d e3 bf 98     save  %sp, -104, %sp
   101e4:    82 10 20 01     mov  1, %g1
   101e8:    c2 37 bf fc     sth  %g1, [ %fp + -4 ]
   101ec:    c2 2f bf ff     stb  %g1, [ %fp + -1 ]
   101f0:    c2 2f bf fe     stb  %g1, [ %fp + -2 ]
   101f4:    84 10 20 00     clr  %g2
   101f8:    c2 0f bf ff     ldub  [ %fp + -1 ], %g1
   101fc:    c6 17 bf fc     lduh  [ %fp + -4 ], %g3
   10200:    89 28 e0 10     sll  %g3, 0x10, %g4
   10204:    10 80 00 03     b  10210 <main+0x30>
   10208:    89 39 20 10     sra  %g4, 0x10, %g4
   1020c:    84 00 a0 01     inc  %g2
   10210:    80 a0 80 04     cmp  %g2, %g4
   10214:    26 bf ff fe     bl,a   1020c <main+0x2c>
   10218:    82 00 40 01     add  %g1, %g1, %g1
   1021c:    83 28 60 18     sll  %g1, 0x18, %g1
   10220:    83 38 60 18     sra  %g1, 0x18, %g1
   10224:    86 00 40 03     add  %g1, %g3, %g3
   10228:    84 10 20 00     clr  %g2
   1022c:    c6 37 bf fc     sth  %g3, [ %fp + -4 ]
   10230:    c2 0f bf fe     ldub  [ %fp + -2 ], %g1
   10234:    c6 17 bf fc     lduh  [ %fp + -4 ], %g3
   10238:    89 28 e0 10     sll  %g3, 0x10, %g4
   1023c:    10 80 00 03     b  10248 <main+0x68>
   10240:    89 39 20 10     sra  %g4, 0x10, %g4
   10244:    84 00 a0 01     inc  %g2
   10248:    80 a0 80 04     cmp  %g2, %g4
   1024c:    26 bf ff fe     bl,a   10244 <main+0x64>
   10250:    82 00 40 01     add  %g1, %g1, %g1
   10254:    83 28 60 18     sll  %g1, 0x18, %g1
   10258:    83 38 60 18     sra  %g1, 0x18, %g1
   1025c:    86 00 40 03     add  %g1, %g3, %g3
   10260:    84 10 20 00     clr  %g2
   10264:    c6 37 bf fc     sth  %g3, [ %fp + -4 ]
   10268:    c2 0f bf ff     ldub  [ %fp + -1 ], %g1
   1026c:    c6 0f bf fe     ldub  [ %fp + -2 ], %g3
   10270:    87 28 e0 18     sll  %g3, 0x18, %g3
   10274:    87 38 e0 18     sra  %g3, 0x18, %g3
   10278:    10 80 00 03     b  10284 <main+0xa4>
   1027c:    88 10 00 03     mov  %g3, %g4
   10280:    84 00 a0 01     inc  %g2
   10284:    80 a0 80 04     cmp  %g2, %g4
   10288:    26 bf ff fe     bl,a   10280 <main+0xa0>
   1028c:    82 00 40 01     add  %g1, %g1, %g1
   10290:    83 28 60 18     sll  %g1, 0x18, %g1
   10294:    83 38 60 18     sra  %g1, 0x18, %g1
   10298:    86 00 40 03     add  %g1, %g3, %g3
   1029c:    c6 37 bf fc     sth  %g3, [ %fp + -4 ]
   102a0:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   102a4:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
   102a8:    91 2a 20 18     sll  %o0, 0x18, %o0
   102ac:    93 2a 60 10     sll  %o1, 0x10, %o1
   102b0:    91 3a 20 18     sra  %o0, 0x18, %o0
   102b4:    7f ff ff be     call  101ac <somme2>
   102b8:    93 3a 60 10     sra  %o1, 0x10, %o1
   102bc:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   102c0:    d0 0f bf fe     ldub  [ %fp + -2 ], %o0
   102c4:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
   102c8:    91 2a 20 18     sll  %o0, 0x18, %o0
   102cc:    93 2a 60 10     sll  %o1, 0x10, %o1
   102d0:    91 3a 20 18     sra  %o0, 0x18, %o0
   102d4:    7f ff ff b6     call  101ac <somme2>
   102d8:    93 3a 60 10     sra  %o1, 0x10, %o1
   102dc:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   102e0:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   102e4:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   102e8:    91 2a 20 18     sll  %o0, 0x18, %o0
   102ec:    93 2a 60 18     sll  %o1, 0x18, %o1
   102f0:    91 3a 20 18     sra  %o0, 0x18, %o0
   102f4:    7f ff ff ae     call  101ac <somme2>
   102f8:    93 3a 60 18     sra  %o1, 0x18, %o1
   102fc:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10300:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   10304:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   10308:    91 2a 20 18     sll  %o0, 0x18, %o0
   1030c:    93 2a 60 18     sll  %o1, 0x18, %o1
   10310:    91 3a 20 18     sra  %o0, 0x18, %o0
   10314:    7f ff ff a6     call  101ac <somme2>
   10318:    93 3a 60 17     sra  %o1, 0x17, %o1
   1031c:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10320:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   10324:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   10328:    93 2a 60 18     sll  %o1, 0x18, %o1
   1032c:    93 3a 60 18     sra  %o1, 0x18, %o1
   10330:    82 02 40 09     add  %o1, %o1, %g1
   10334:    91 2a 20 18     sll  %o0, 0x18, %o0
   10338:    92 00 40 09     add  %g1, %o1, %o1
   1033c:    91 3a 20 18     sra  %o0, 0x18, %o0
   10340:    93 2a 60 10     sll  %o1, 0x10, %o1
   10344:    7f ff ff 9a     call  101ac <somme2>
   10348:    93 3a 60 10     sra  %o1, 0x10, %o1
   1034c:    b0 10 20 00     clr  %i0
   10350:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10354:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
   10358:    d2 0f bf fe     ldub  [ %fp + -2 ], %o1
   1035c:    91 2a 20 18     sll  %o0, 0x18, %o0
   10360:    93 2a 60 18     sll  %o1, 0x18, %o1
   10364:    91 3a 20 18     sra  %o0, 0x18, %o0
   10368:    7f ff ff 91     call  101ac <somme2>
   1036c:    93 3a 60 16     sra  %o1, 0x16, %o1
   10370:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
   10374:    81 c7 e0 08     ret
   10378:    81 e8 00 00     restore

where it can be observed that the function "somme" has been inlined.
However, the code is not smaller:

- size WITHOUT the options "-combine -fwhole-program":
(0x10360-0x101ac)/4+1=110 instructions (NO INLINING WAS DONE)
- size WITH the options "-combine -fwhole-program":
(0x10378-0x101ac)/4+1=116 instructions (INLINING WAS DONE)

so taking into account that after inlining the resulting code is bigger,
I don't understand why such procedure is done. Do you guys have an idea
about this?

Thanks in advance,


Best regards,


Jorge



Eric Botcazou wrote:
>> In that case your patch modification seems to work better since it seems
>> to remove the cast from the callee and does the sign extension on the
>> caller:
>>
>> (gcc4.5.1 with your patch applied)
>>
>> 0001024c <somme>:
>>    1024c:    10 80 00 04     b  1025c <somme+0x10>
>>    10250:    82 10 20 00     clr  %g1
>>    10254:    82 00 60 01     inc  %g1
>>    10258:    91 3a 20 18     sra  %o0, 0x18, %o0
>>    1025c:    80 a0 40 09     cmp  %g1, %o1
>>    10260:    26 bf ff fd     bl,a   10254 <somme+0x8>
>>    10264:    91 2a 20 19     sll  %o0, 0x19, %o0
>>    10268:    90 02 00 09     add  %o0, %o1, %o0
>>    1026c:    91 2a 20 10     sll  %o0, 0x10, %o0
>>    10270:    81 c3 e0 08     retl
>>    10274:    91 3a 20 10     sra  %o0, 0x10, %o0
>>
>>   000102ac <main>:
>>    102ac:    9d e3 bf 98     save  %sp, -104, %sp
>>    102b0:    82 10 20 01     mov  1, %g1
>>    102b4:    c2 37 bf fc     sth  %g1, [ %fp + -4 ]
>>    102b8:    c2 2f bf ff     stb  %g1, [ %fp + -1 ]
>>    102bc:    c2 2f bf fe     stb  %g1, [ %fp + -2 ]
>>    102c0:    d0 0f bf ff     ldub  [ %fp + -1 ], %o0
>>    102c4:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
>>    102c8:    91 2a 20 18     sll  %o0, 0x18, %o0
>>    102cc:    93 2a 60 10     sll  %o1, 0x10, %o1
>>    102d0:    91 3a 20 18     sra  %o0, 0x18, %o0
>>    102d4:    7f ff ff de     call  1024c <somme>
>>    102d8:    93 3a 60 10     sra  %o1, 0x10, %o1
>>    102dc:    d0 37 bf fc     sth  %o0, [ %fp + -4 ]
>>    102e0:    d0 0f bf fe     ldub  [ %fp + -2 ], %o0
>>    102e4:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
>>    102e8:    91 2a 20 18     sll  %o0, 0x18, %o0
>>    102ec:    93 2a 60 10     sll  %o1, 0x10, %o1
>>    102f0:    91 3a 20 18     sra  %o0, 0x18, %o0
>>    102f4:    7f ff ff d6     call  1024c <somme>
>> ...
>>
>> isn't that right?
>>     
>
> Yes, the code is now correct, but still sub-optimal as the sequence:
>
>   
>>    102c4:    d2 17 bf fc     lduh  [ %fp + -4 ], %o1
>>     
>
>   
>>    102cc:    93 2a 60 10     sll  %o1, 0x10, %o1
>>     
>
>   
>>    102d8:    93 3a 60 10     sra  %o1, 0x10, %o1
>>     
>
> could be reduced to just:
>
>   ldsh  [ %fp + -4 ], %o1
>
> but this requires more work in the back-end.  This is actually the bulk of the 
> work to be done as the extension in the callee that TARGET_PROMOTE_PROTOTYPES 
> triggers has very likely a marginal effect overall.
>
> Note that x86, MIPS, HP-PA, m68k, IA-64 and many others have the same setting.
>
>   





[Index of Archives]     [Linux C Programming]     [Linux Kernel]     [eCos]     [Fedora Development]     [Fedora Announce]     [Autoconf]     [The DWARVES Debugging Tools]     [Yosemite Campsites]     [Yosemite News]     [Linux GCC]

  Powered by Linux