Hello, While working on this casting issue I also noticed something I don't quite understand when code is inlined. Coming back to the test.c as previously defined in http://gcc.gnu.org/ml/gcc-help/2010-10/msg00389.html, when it is compiled using GCC 4.5.1 for SPARC V8 with the following command: /opt/sparc-elf-4.5.1/bin/sparc-elf-gcc test.c -o test.elf -mcpu=v8 -msoft-float -e main -Os -Wl,--gc-sections the dump is 000101ac <somme>: 101ac: 85 2a 60 10 sll %o1, 0x10, %g2 101b0: 82 10 20 00 clr %g1 101b4: 10 80 00 03 b 101c0 <somme+0x14> 101b8: 85 38 a0 10 sra %g2, 0x10, %g2 101bc: 82 00 60 01 inc %g1 101c0: 80 a0 40 02 cmp %g1, %g2 101c4: 26 bf ff fe bl,a 101bc <somme+0x10> 101c8: 90 02 00 08 add %o0, %o0, %o0 101cc: 91 2a 20 18 sll %o0, 0x18, %o0 101d0: 91 3a 20 18 sra %o0, 0x18, %o0 101d4: 81 c3 e0 08 retl 101d8: 90 02 40 08 add %o1, %o0, %o0 000101dc <somme2>: 101dc: 84 10 00 09 mov %o1, %g2 101e0: 82 10 20 00 clr %g1 101e4: 87 28 a0 10 sll %g2, 0x10, %g3 101e8: 10 80 00 03 b 101f4 <somme2+0x18> 101ec: 87 38 e0 10 sra %g3, 0x10, %g3 101f0: 82 00 60 01 inc %g1 101f4: 80 a0 40 03 cmp %g1, %g3 101f8: 26 bf ff fe bl,a 101f0 <somme2+0x14> 101fc: 90 02 00 02 add %o0, %g2, %o0 10200: 92 02 40 09 add %o1, %o1, %o1 10204: 91 2a 20 18 sll %o0, 0x18, %o0 10208: 91 3a 20 18 sra %o0, 0x18, %o0 1020c: 81 c3 e0 08 retl 10210: 90 02 40 08 add %o1, %o0, %o0 00010214 <main>: 10214: 9d e3 bf 98 save %sp, -104, %sp 10218: 82 10 20 01 mov 1, %g1 1021c: c2 37 bf fc sth %g1, [ %fp + -4 ] 10220: c2 2f bf ff stb %g1, [ %fp + -1 ] 10224: c2 2f bf fe stb %g1, [ %fp + -2 ] 10228: d0 0f bf ff ldub [ %fp + -1 ], %o0 1022c: d2 17 bf fc lduh [ %fp + -4 ], %o1 10230: 91 2a 20 18 sll %o0, 0x18, %o0 10234: 93 2a 60 10 sll %o1, 0x10, %o1 10238: 91 3a 20 18 sra %o0, 0x18, %o0 1023c: 7f ff ff dc call 101ac <somme> 10240: 93 3a 60 10 sra %o1, 0x10, %o1 10244: d0 37 bf fc sth %o0, [ %fp + -4 ] 10248: d0 0f bf fe ldub [ %fp + -2 ], %o0 1024c: d2 17 bf fc lduh [ %fp + -4 ], %o1 10250: 91 2a 20 18 sll %o0, 0x18, %o0 10254: 93 2a 60 10 sll %o1, 0x10, %o1 10258: 91 3a 20 18 sra %o0, 0x18, %o0 1025c: 7f ff ff d4 call 101ac <somme> 10260: 93 3a 60 10 sra %o1, 0x10, %o1 10264: d0 37 bf fc sth %o0, [ %fp + -4 ] 10268: d0 0f bf ff ldub [ %fp + -1 ], %o0 1026c: d2 0f bf fe ldub [ %fp + -2 ], %o1 10270: 91 2a 20 18 sll %o0, 0x18, %o0 10274: 93 2a 60 18 sll %o1, 0x18, %o1 10278: 91 3a 20 18 sra %o0, 0x18, %o0 1027c: 7f ff ff cc call 101ac <somme> 10280: 93 3a 60 18 sra %o1, 0x18, %o1 10284: d0 37 bf fc sth %o0, [ %fp + -4 ] 10288: d0 0f bf ff ldub [ %fp + -1 ], %o0 1028c: d2 17 bf fc lduh [ %fp + -4 ], %o1 10290: 91 2a 20 18 sll %o0, 0x18, %o0 10294: 93 2a 60 10 sll %o1, 0x10, %o1 10298: 91 3a 20 18 sra %o0, 0x18, %o0 1029c: 7f ff ff d0 call 101dc <somme2> 102a0: 93 3a 60 10 sra %o1, 0x10, %o1 102a4: d0 37 bf fc sth %o0, [ %fp + -4 ] 102a8: d0 0f bf fe ldub [ %fp + -2 ], %o0 102ac: d2 17 bf fc lduh [ %fp + -4 ], %o1 102b0: 91 2a 20 18 sll %o0, 0x18, %o0 102b4: 93 2a 60 10 sll %o1, 0x10, %o1 102b8: 91 3a 20 18 sra %o0, 0x18, %o0 102bc: 7f ff ff c8 call 101dc <somme2> 102c0: 93 3a 60 10 sra %o1, 0x10, %o1 102c4: d0 37 bf fc sth %o0, [ %fp + -4 ] 102c8: d0 0f bf ff ldub [ %fp + -1 ], %o0 102cc: d2 0f bf fe ldub [ %fp + -2 ], %o1 102d0: 91 2a 20 18 sll %o0, 0x18, %o0 102d4: 93 2a 60 18 sll %o1, 0x18, %o1 102d8: 91 3a 20 18 sra %o0, 0x18, %o0 102dc: 7f ff ff c0 call 101dc <somme2> 102e0: 93 3a 60 18 sra %o1, 0x18, %o1 102e4: d0 37 bf fc sth %o0, [ %fp + -4 ] 102e8: d0 0f bf ff ldub [ %fp + -1 ], %o0 102ec: d2 0f bf fe ldub [ %fp + -2 ], %o1 102f0: 91 2a 20 18 sll %o0, 0x18, %o0 102f4: 93 2a 60 18 sll %o1, 0x18, %o1 102f8: 91 3a 20 18 sra %o0, 0x18, %o0 102fc: 7f ff ff b8 call 101dc <somme2> 10300: 93 3a 60 17 sra %o1, 0x17, %o1 10304: d0 37 bf fc sth %o0, [ %fp + -4 ] 10308: d0 0f bf ff ldub [ %fp + -1 ], %o0 1030c: d2 0f bf fe ldub [ %fp + -2 ], %o1 10310: 93 2a 60 18 sll %o1, 0x18, %o1 10314: 93 3a 60 18 sra %o1, 0x18, %o1 10318: 82 02 40 09 add %o1, %o1, %g1 1031c: 91 2a 20 18 sll %o0, 0x18, %o0 10320: 92 00 40 09 add %g1, %o1, %o1 10324: 91 3a 20 18 sra %o0, 0x18, %o0 10328: 93 2a 60 10 sll %o1, 0x10, %o1 1032c: 7f ff ff ac call 101dc <somme2> 10330: 93 3a 60 10 sra %o1, 0x10, %o1 10334: b0 10 20 00 clr %i0 10338: d0 37 bf fc sth %o0, [ %fp + -4 ] 1033c: d0 0f bf ff ldub [ %fp + -1 ], %o0 10340: d2 0f bf fe ldub [ %fp + -2 ], %o1 10344: 91 2a 20 18 sll %o0, 0x18, %o0 10348: 93 2a 60 18 sll %o1, 0x18, %o1 1034c: 91 3a 20 18 sra %o0, 0x18, %o0 10350: 7f ff ff a3 call 101dc <somme2> 10354: 93 3a 60 16 sra %o1, 0x16, %o1 10358: d0 37 bf fc sth %o0, [ %fp + -4 ] 1035c: 81 c7 e0 08 ret 10360: 81 e8 00 00 restore whereas if the options: "-combine -fwhole-program" are added to the same command, the resulting code is: 000101ac <somme2>: 101ac: 82 10 20 00 clr %g1 101b0: 86 10 00 09 mov %o1, %g3 101b4: 10 80 00 03 b 101c0 <somme2+0x14> 101b8: 84 10 00 09 mov %o1, %g2 101bc: 82 00 60 01 inc %g1 101c0: 80 a0 40 03 cmp %g1, %g3 101c4: 26 bf ff fe bl,a 101bc <somme2+0x10> 101c8: 90 02 00 02 add %o0, %g2, %o0 101cc: 92 02 40 09 add %o1, %o1, %o1 101d0: 91 2a 20 18 sll %o0, 0x18, %o0 101d4: 91 3a 20 18 sra %o0, 0x18, %o0 101d8: 81 c3 e0 08 retl 101dc: 90 02 40 08 add %o1, %o0, %o0 000101e0 <main>: 101e0: 9d e3 bf 98 save %sp, -104, %sp 101e4: 82 10 20 01 mov 1, %g1 101e8: c2 37 bf fc sth %g1, [ %fp + -4 ] 101ec: c2 2f bf ff stb %g1, [ %fp + -1 ] 101f0: c2 2f bf fe stb %g1, [ %fp + -2 ] 101f4: 84 10 20 00 clr %g2 101f8: c2 0f bf ff ldub [ %fp + -1 ], %g1 101fc: c6 17 bf fc lduh [ %fp + -4 ], %g3 10200: 89 28 e0 10 sll %g3, 0x10, %g4 10204: 10 80 00 03 b 10210 <main+0x30> 10208: 89 39 20 10 sra %g4, 0x10, %g4 1020c: 84 00 a0 01 inc %g2 10210: 80 a0 80 04 cmp %g2, %g4 10214: 26 bf ff fe bl,a 1020c <main+0x2c> 10218: 82 00 40 01 add %g1, %g1, %g1 1021c: 83 28 60 18 sll %g1, 0x18, %g1 10220: 83 38 60 18 sra %g1, 0x18, %g1 10224: 86 00 40 03 add %g1, %g3, %g3 10228: 84 10 20 00 clr %g2 1022c: c6 37 bf fc sth %g3, [ %fp + -4 ] 10230: c2 0f bf fe ldub [ %fp + -2 ], %g1 10234: c6 17 bf fc lduh [ %fp + -4 ], %g3 10238: 89 28 e0 10 sll %g3, 0x10, %g4 1023c: 10 80 00 03 b 10248 <main+0x68> 10240: 89 39 20 10 sra %g4, 0x10, %g4 10244: 84 00 a0 01 inc %g2 10248: 80 a0 80 04 cmp %g2, %g4 1024c: 26 bf ff fe bl,a 10244 <main+0x64> 10250: 82 00 40 01 add %g1, %g1, %g1 10254: 83 28 60 18 sll %g1, 0x18, %g1 10258: 83 38 60 18 sra %g1, 0x18, %g1 1025c: 86 00 40 03 add %g1, %g3, %g3 10260: 84 10 20 00 clr %g2 10264: c6 37 bf fc sth %g3, [ %fp + -4 ] 10268: c2 0f bf ff ldub [ %fp + -1 ], %g1 1026c: c6 0f bf fe ldub [ %fp + -2 ], %g3 10270: 87 28 e0 18 sll %g3, 0x18, %g3 10274: 87 38 e0 18 sra %g3, 0x18, %g3 10278: 10 80 00 03 b 10284 <main+0xa4> 1027c: 88 10 00 03 mov %g3, %g4 10280: 84 00 a0 01 inc %g2 10284: 80 a0 80 04 cmp %g2, %g4 10288: 26 bf ff fe bl,a 10280 <main+0xa0> 1028c: 82 00 40 01 add %g1, %g1, %g1 10290: 83 28 60 18 sll %g1, 0x18, %g1 10294: 83 38 60 18 sra %g1, 0x18, %g1 10298: 86 00 40 03 add %g1, %g3, %g3 1029c: c6 37 bf fc sth %g3, [ %fp + -4 ] 102a0: d0 0f bf ff ldub [ %fp + -1 ], %o0 102a4: d2 17 bf fc lduh [ %fp + -4 ], %o1 102a8: 91 2a 20 18 sll %o0, 0x18, %o0 102ac: 93 2a 60 10 sll %o1, 0x10, %o1 102b0: 91 3a 20 18 sra %o0, 0x18, %o0 102b4: 7f ff ff be call 101ac <somme2> 102b8: 93 3a 60 10 sra %o1, 0x10, %o1 102bc: d0 37 bf fc sth %o0, [ %fp + -4 ] 102c0: d0 0f bf fe ldub [ %fp + -2 ], %o0 102c4: d2 17 bf fc lduh [ %fp + -4 ], %o1 102c8: 91 2a 20 18 sll %o0, 0x18, %o0 102cc: 93 2a 60 10 sll %o1, 0x10, %o1 102d0: 91 3a 20 18 sra %o0, 0x18, %o0 102d4: 7f ff ff b6 call 101ac <somme2> 102d8: 93 3a 60 10 sra %o1, 0x10, %o1 102dc: d0 37 bf fc sth %o0, [ %fp + -4 ] 102e0: d0 0f bf ff ldub [ %fp + -1 ], %o0 102e4: d2 0f bf fe ldub [ %fp + -2 ], %o1 102e8: 91 2a 20 18 sll %o0, 0x18, %o0 102ec: 93 2a 60 18 sll %o1, 0x18, %o1 102f0: 91 3a 20 18 sra %o0, 0x18, %o0 102f4: 7f ff ff ae call 101ac <somme2> 102f8: 93 3a 60 18 sra %o1, 0x18, %o1 102fc: d0 37 bf fc sth %o0, [ %fp + -4 ] 10300: d0 0f bf ff ldub [ %fp + -1 ], %o0 10304: d2 0f bf fe ldub [ %fp + -2 ], %o1 10308: 91 2a 20 18 sll %o0, 0x18, %o0 1030c: 93 2a 60 18 sll %o1, 0x18, %o1 10310: 91 3a 20 18 sra %o0, 0x18, %o0 10314: 7f ff ff a6 call 101ac <somme2> 10318: 93 3a 60 17 sra %o1, 0x17, %o1 1031c: d0 37 bf fc sth %o0, [ %fp + -4 ] 10320: d0 0f bf ff ldub [ %fp + -1 ], %o0 10324: d2 0f bf fe ldub [ %fp + -2 ], %o1 10328: 93 2a 60 18 sll %o1, 0x18, %o1 1032c: 93 3a 60 18 sra %o1, 0x18, %o1 10330: 82 02 40 09 add %o1, %o1, %g1 10334: 91 2a 20 18 sll %o0, 0x18, %o0 10338: 92 00 40 09 add %g1, %o1, %o1 1033c: 91 3a 20 18 sra %o0, 0x18, %o0 10340: 93 2a 60 10 sll %o1, 0x10, %o1 10344: 7f ff ff 9a call 101ac <somme2> 10348: 93 3a 60 10 sra %o1, 0x10, %o1 1034c: b0 10 20 00 clr %i0 10350: d0 37 bf fc sth %o0, [ %fp + -4 ] 10354: d0 0f bf ff ldub [ %fp + -1 ], %o0 10358: d2 0f bf fe ldub [ %fp + -2 ], %o1 1035c: 91 2a 20 18 sll %o0, 0x18, %o0 10360: 93 2a 60 18 sll %o1, 0x18, %o1 10364: 91 3a 20 18 sra %o0, 0x18, %o0 10368: 7f ff ff 91 call 101ac <somme2> 1036c: 93 3a 60 16 sra %o1, 0x16, %o1 10370: d0 37 bf fc sth %o0, [ %fp + -4 ] 10374: 81 c7 e0 08 ret 10378: 81 e8 00 00 restore where it can be observed that the function "somme" has been inlined. However, the code is not smaller: - size WITHOUT the options "-combine -fwhole-program": (0x10360-0x101ac)/4+1=110 instructions (NO INLINING WAS DONE) - size WITH the options "-combine -fwhole-program": (0x10378-0x101ac)/4+1=116 instructions (INLINING WAS DONE) so taking into account that after inlining the resulting code is bigger, I don't understand why such procedure is done. Do you guys have an idea about this? Thanks in advance, Best regards, Jorge Eric Botcazou wrote: >> In that case your patch modification seems to work better since it seems >> to remove the cast from the callee and does the sign extension on the >> caller: >> >> (gcc4.5.1 with your patch applied) >> >> 0001024c <somme>: >> 1024c: 10 80 00 04 b 1025c <somme+0x10> >> 10250: 82 10 20 00 clr %g1 >> 10254: 82 00 60 01 inc %g1 >> 10258: 91 3a 20 18 sra %o0, 0x18, %o0 >> 1025c: 80 a0 40 09 cmp %g1, %o1 >> 10260: 26 bf ff fd bl,a 10254 <somme+0x8> >> 10264: 91 2a 20 19 sll %o0, 0x19, %o0 >> 10268: 90 02 00 09 add %o0, %o1, %o0 >> 1026c: 91 2a 20 10 sll %o0, 0x10, %o0 >> 10270: 81 c3 e0 08 retl >> 10274: 91 3a 20 10 sra %o0, 0x10, %o0 >> >> 000102ac <main>: >> 102ac: 9d e3 bf 98 save %sp, -104, %sp >> 102b0: 82 10 20 01 mov 1, %g1 >> 102b4: c2 37 bf fc sth %g1, [ %fp + -4 ] >> 102b8: c2 2f bf ff stb %g1, [ %fp + -1 ] >> 102bc: c2 2f bf fe stb %g1, [ %fp + -2 ] >> 102c0: d0 0f bf ff ldub [ %fp + -1 ], %o0 >> 102c4: d2 17 bf fc lduh [ %fp + -4 ], %o1 >> 102c8: 91 2a 20 18 sll %o0, 0x18, %o0 >> 102cc: 93 2a 60 10 sll %o1, 0x10, %o1 >> 102d0: 91 3a 20 18 sra %o0, 0x18, %o0 >> 102d4: 7f ff ff de call 1024c <somme> >> 102d8: 93 3a 60 10 sra %o1, 0x10, %o1 >> 102dc: d0 37 bf fc sth %o0, [ %fp + -4 ] >> 102e0: d0 0f bf fe ldub [ %fp + -2 ], %o0 >> 102e4: d2 17 bf fc lduh [ %fp + -4 ], %o1 >> 102e8: 91 2a 20 18 sll %o0, 0x18, %o0 >> 102ec: 93 2a 60 10 sll %o1, 0x10, %o1 >> 102f0: 91 3a 20 18 sra %o0, 0x18, %o0 >> 102f4: 7f ff ff d6 call 1024c <somme> >> ... >> >> isn't that right? >> > > Yes, the code is now correct, but still sub-optimal as the sequence: > > >> 102c4: d2 17 bf fc lduh [ %fp + -4 ], %o1 >> > > >> 102cc: 93 2a 60 10 sll %o1, 0x10, %o1 >> > > >> 102d8: 93 3a 60 10 sra %o1, 0x10, %o1 >> > > could be reduced to just: > > ldsh [ %fp + -4 ], %o1 > > but this requires more work in the back-end. This is actually the bulk of the > work to be done as the extension in the callee that TARGET_PROMOTE_PROTOTYPES > triggers has very likely a marginal effect overall. > > Note that x86, MIPS, HP-PA, m68k, IA-64 and many others have the same setting. > >