"Sebastien M." wrote: > The loop which call "empty_ptr()" is four times slower with 02. > Is there a logical reason for this ? The only significant difference between the two seems to be that with -O2 gcc chooses esi as the register to hold the function pointer. -O0: time = clock(); 40136c: e8 ef 02 00 00 call 401660 <_clock> 401371: 89 45 f8 mov %eax,0xfffffff8(%ebp) for(int i=0;i<numtests;i++) empty_ptr(x); 401374: c7 45 e4 00 00 00 00 movl $0x0,0xffffffe4(%ebp) 40137b: 8b 45 e4 mov 0xffffffe4(%ebp),%eax 40137e: 3b 45 f4 cmp 0xfffffff4(%ebp),%eax 401381: 7d 14 jge 401397 <_main+0xaf> 401383: dd 45 e8 fldl 0xffffffe8(%ebp) 401386: dd 1c 24 fstpl (%esp) 401389: 8b 45 fc mov 0xfffffffc(%ebp),%eax 40138c: ff d0 call *%eax 40138e: dd d8 fstp %st(0) 401390: 8d 45 e4 lea 0xffffffe4(%ebp),%eax 401393: ff 00 incl (%eax) 401395: eb e4 jmp 40137b <_main+0x93> printf("Time : empty_ptr = %ld\n", clock() - time); 401397: e8 c4 02 00 00 call 401660 <_clock> 40139c: 2b 45 f8 sub 0xfffffff8(%ebp),%eax 40139f: 89 44 24 04 mov %eax,0x4(%esp) 4013a3: c7 04 24 14 30 40 00 movl $0x403014,(%esp) 4013aa: e8 a1 02 00 00 call 401650 <_printf> -O2: time = clock(); 40133f: e8 ec 02 00 00 call 401630 <_clock> 401344: 89 c7 mov %eax,%edi 401346: 8d 76 00 lea 0x0(%esi),%esi 401349: 8d bc 27 00 00 00 00 lea 0x0(%edi),%edi for(int i=0;i<numtests;i++) empty_ptr(x); 401350: c7 04 24 00 00 00 00 movl $0x0,(%esp) 401357: b8 00 00 08 40 mov $0x40080000,%eax 40135c: 89 44 24 04 mov %eax,0x4(%esp) 401360: ff d6 call *%esi 401362: dd d8 fstp %st(0) 401364: 4b dec %ebx 401365: 79 e9 jns 401350 <_main+0x60> printf("Time : empty_ptr = %ld\n", clock() - time); 401367: e8 c4 02 00 00 call 401630 <_clock> 40136c: c7 04 24 14 30 40 00 movl $0x403014,(%esp) 401373: 29 f8 sub %edi,%eax 401375: 89 44 24 04 mov %eax,0x4(%esp) 401379: e8 a2 02 00 00 call 401620 <_printf> So it could just be a bad decision by the register allocator. I don't know what the performance difference between call *%eax and call *%esi is. Also note that gcc 4.3 with -O2 is smart enough to recognise that neither of these loops do anything and remove them both entirely: time = clock(); 40108b: e8 c0 00 00 00 call 401150 <_clock> 401090: 89 c3 mov %eax,%ebx for(int i=0;i<numtests;i++) empty(x); printf("Time : empty = %ld\n", clock() - time); 401092: e8 b9 00 00 00 call 401150 <_clock> 401097: c7 04 24 00 20 40 00 movl $0x402000,(%esp) 40109e: 29 d8 sub %ebx,%eax 4010a0: 89 44 24 04 mov %eax,0x4(%esp) 4010a4: e8 b7 00 00 00 call 401160 <_printf> time = clock(); 4010a9: e8 a2 00 00 00 call 401150 <_clock> 4010ae: 89 c3 mov %eax,%ebx for(int i=0;i<numtests;i++) empty_ptr(x); printf("Time : empty_ptr = %ld\n", clock() - time); 4010b0: e8 9b 00 00 00 call 401150 <_clock> 4010b5: c7 04 24 14 20 40 00 movl $0x402014,(%esp) 4010bc: 29 d8 sub %ebx,%eax 4010be: 89 44 24 04 mov %eax,0x4(%esp) 4010c2: e8 99 00 00 00 call 401160 <_printf> Time : empty = 0 Time : empty_ptr = 0 Brian