Hi. I'm working on a lib that uses policy templates quite extensively, in the vein of the STL's template <class Compare> std::list::sort(Compare comp). Compare can be a function or a struct with operator(). While the functionality is equivalent the compiler inlines the struct but not the function. Changing to the struct implementation decreased runtime by more than 40% in an image filtering function. Below you can find a tiny example. The commented code gives the alternate implementation which is perfectly inlined. See attachments for assembler code. Can somebody explain why lessFunc is not inlined? /usr/i686-pc-linux-gnu/gcc-bin/4.0.1-beta20050507/gcc --version gcc (GCC) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507) flags: -mtune=pentium4 -O3 -fomit-frame-pointer -S gcc3.3.3 (SuSE9.1), gcc3.3.5(gentoo), gcc.3.4.3(Gentoo 3.4.3.20050110-r2) perform much worse in both cases. Good point for gcc4. icc8.0 produces roughly the same result as gcc4.0.1. Also very good. I think the gcc4.x of gentoo is very close to or the same as the official snapshot. Thanks for hints, Peter -------------------------------------------------------------------------------------------- // struct less { // inline bool operator()(const int a, const int b) { // return a<b; // } // }; inline bool lessFunc(const int a, const int b) { return a<b; } template <class Comp> bool foo(const int a, const int b, Comp comp) { return comp(a,b); } int main(int argc, char** argv) { return foo(argc,1,lessFunc); // return foo(argc,3,less()); }
// struct less { // inline bool operator()(const int a, const int b) { // return a<b; // } // }; inline bool lessFunc(const int a, const int b) { return a<b; } template <class Comp> bool foo(const int a, const int b, Comp comp) { return comp(a,b); } int main(int argc, char** argv) { return foo(argc,1,lessFunc); // return foo(argc,3,less()); }
.file "testTemplateInline.cpp" .section .gnu.linkonce.t._Z8lessFuncii,"ax",@progbits .align 2 .weak _Z8lessFuncii .type _Z8lessFuncii, @function _Z8lessFuncii: .LFB2: movl 8(%esp), %eax cmpl %eax, 4(%esp) setl %al movzbl %al, %eax ret .LFE2: .size _Z8lessFuncii, .-_Z8lessFuncii .text .align 2 .globl main .type main, @function main: .LFB4: pushl %ebp .LCFI0: movl %esp, %ebp .LCFI1: subl $8, %esp .LCFI2: andl $-16, %esp subl $16, %esp movl $1, 4(%esp) movl 8(%ebp), %eax movl %eax, (%esp) call _Z8lessFuncii movzbl %al, %eax leave ret .LFE4: .size main, .-main .ident "GCC: (GNU) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)" .section .note.GNU-stack,"",@progbits
.file "testTemplateInline.cpp" .text .align 2 .globl main .type main, @function main: .LFB4: pushl %ebp .LCFI0: movl %esp, %ebp .LCFI1: subl $8, %esp .LCFI2: andl $-16, %esp subl $16, %esp cmpl $2, 8(%ebp) setle %al andl $1, %eax leave ret .LFE4: .size main, .-main .ident "GCC: (GNU) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)" .section .note.GNU-stack,"",@progbits