Re: Optimized program four times slower.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



"Sebastien M." wrote:

> The loop which call "empty_ptr()" is four times slower with 02.
> Is there a logical reason for this ?

The only significant difference between the two seems to be that with
-O2 gcc chooses esi as the register to hold the function pointer.

-O0:

     time = clock();
  40136c:       e8 ef 02 00 00          call   401660 <_clock>
  401371:       89 45 f8                mov    %eax,0xfffffff8(%ebp)
     for(int i=0;i<numtests;i++) empty_ptr(x);
  401374:       c7 45 e4 00 00 00 00    movl   $0x0,0xffffffe4(%ebp)
  40137b:       8b 45 e4                mov    0xffffffe4(%ebp),%eax
  40137e:       3b 45 f4                cmp    0xfffffff4(%ebp),%eax
  401381:       7d 14                   jge    401397 <_main+0xaf>
  401383:       dd 45 e8                fldl   0xffffffe8(%ebp)
  401386:       dd 1c 24                fstpl  (%esp)
  401389:       8b 45 fc                mov    0xfffffffc(%ebp),%eax
  40138c:       ff d0                   call   *%eax
  40138e:       dd d8                   fstp   %st(0)
  401390:       8d 45 e4                lea    0xffffffe4(%ebp),%eax
  401393:       ff 00                   incl   (%eax)
  401395:       eb e4                   jmp    40137b <_main+0x93>
     printf("Time : empty_ptr = %ld\n", clock() - time);
  401397:       e8 c4 02 00 00          call   401660 <_clock>
  40139c:       2b 45 f8                sub    0xfffffff8(%ebp),%eax
  40139f:       89 44 24 04             mov    %eax,0x4(%esp)
  4013a3:       c7 04 24 14 30 40 00    movl   $0x403014,(%esp)
  4013aa:       e8 a1 02 00 00          call   401650 <_printf>


-O2:

     time = clock();
  40133f:       e8 ec 02 00 00          call   401630 <_clock>
  401344:       89 c7                   mov    %eax,%edi
  401346:       8d 76 00                lea    0x0(%esi),%esi
  401349:       8d bc 27 00 00 00 00    lea    0x0(%edi),%edi
     for(int i=0;i<numtests;i++) empty_ptr(x);
  401350:       c7 04 24 00 00 00 00    movl   $0x0,(%esp)
  401357:       b8 00 00 08 40          mov    $0x40080000,%eax
  40135c:       89 44 24 04             mov    %eax,0x4(%esp)
  401360:       ff d6                   call   *%esi
  401362:       dd d8                   fstp   %st(0)
  401364:       4b                      dec    %ebx
  401365:       79 e9                   jns    401350 <_main+0x60>
     printf("Time : empty_ptr = %ld\n", clock() - time);
  401367:       e8 c4 02 00 00          call   401630 <_clock>
  40136c:       c7 04 24 14 30 40 00    movl   $0x403014,(%esp)
  401373:       29 f8                   sub    %edi,%eax
  401375:       89 44 24 04             mov    %eax,0x4(%esp)
  401379:       e8 a2 02 00 00          call   401620 <_printf>

So it could just be a bad decision by the register allocator.  I don't
know what the performance difference between call *%eax and call *%esi
is.

Also note that gcc 4.3 with -O2 is smart enough to recognise that
neither of these loops do anything and remove them both entirely:

     time = clock();
  40108b:       e8 c0 00 00 00          call   401150 <_clock>
  401090:       89 c3                   mov    %eax,%ebx
     for(int i=0;i<numtests;i++) empty(x);
     printf("Time : empty = %ld\n", clock() - time);
  401092:       e8 b9 00 00 00          call   401150 <_clock>
  401097:       c7 04 24 00 20 40 00    movl   $0x402000,(%esp)
  40109e:       29 d8                   sub    %ebx,%eax
  4010a0:       89 44 24 04             mov    %eax,0x4(%esp)
  4010a4:       e8 b7 00 00 00          call   401160 <_printf>

     time = clock();
  4010a9:       e8 a2 00 00 00          call   401150 <_clock>
  4010ae:       89 c3                   mov    %eax,%ebx
     for(int i=0;i<numtests;i++) empty_ptr(x);
     printf("Time : empty_ptr = %ld\n", clock() - time);
  4010b0:       e8 9b 00 00 00          call   401150 <_clock>
  4010b5:       c7 04 24 14 20 40 00    movl   $0x402014,(%esp)
  4010bc:       29 d8                   sub    %ebx,%eax
  4010be:       89 44 24 04             mov    %eax,0x4(%esp)
  4010c2:       e8 99 00 00 00          call   401160 <_printf>

Time : empty = 0
Time : empty_ptr = 0

Brian

[Index of Archives]     [Linux C Programming]     [Linux Kernel]     [eCos]     [Fedora Development]     [Fedora Announce]     [Autoconf]     [The DWARVES Debugging Tools]     [Yosemite Campsites]     [Yosemite News]     [Linux GCC]

  Powered by Linux