Inlining functions vs. inlining member functions as templates

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi.

I'm working on a lib that uses policy templates quite extensively, in the vein 
of the STL's template <class Compare> std::list::sort(Compare comp).

Compare can be a function or a struct with operator(). While the functionality 
is equivalent the compiler inlines the struct but not the function. Changing 
to the struct implementation decreased runtime by more than 40% in an image 
filtering function.

Below you can find a tiny example. The commented code gives the alternate 
implementation which is perfectly inlined. See attachments for assembler 
code.

Can somebody explain why lessFunc is not inlined?

/usr/i686-pc-linux-gnu/gcc-bin/4.0.1-beta20050507/gcc --version
gcc (GCC) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)
flags: -mtune=pentium4 -O3 -fomit-frame-pointer -S

gcc3.3.3 (SuSE9.1), gcc3.3.5(gentoo), gcc.3.4.3(Gentoo 3.4.3.20050110-r2) 
perform much worse in both cases. Good point for gcc4.
icc8.0 produces roughly the same result as gcc4.0.1. Also very good.
I think the gcc4.x of gentoo is very close to or the same as the official 
snapshot.

Thanks for hints,
Peter


--------------------------------------------------------------------------------------------

// struct less {
//   inline bool operator()(const int a, const int b) {
//     return a<b;
//   }
// };

inline bool lessFunc(const int a, const int b) {
  return a<b;
}

template <class Comp>
bool foo(const int a, const int b, Comp comp) {
  return comp(a,b);
}

int main(int argc, char** argv) {
  return foo(argc,1,lessFunc);
//   return foo(argc,3,less());
}


// struct less {
//   inline bool operator()(const int a, const int b) {
//     return a<b;
//   }
// };

inline bool lessFunc(const int a, const int b) {
  return a<b;
}

template <class Comp>
bool foo(const int a, const int b, Comp comp) {
  return comp(a,b);
}

int main(int argc, char** argv) {
  return foo(argc,1,lessFunc);
//   return foo(argc,3,less());
}
	.file	"testTemplateInline.cpp"
	.section	.gnu.linkonce.t._Z8lessFuncii,"ax",@progbits
	.align 2
	.weak	_Z8lessFuncii
	.type	_Z8lessFuncii, @function
_Z8lessFuncii:
.LFB2:
	movl	8(%esp), %eax
	cmpl	%eax, 4(%esp)
	setl	%al
	movzbl	%al, %eax
	ret
.LFE2:
	.size	_Z8lessFuncii, .-_Z8lessFuncii
	.text
	.align 2
.globl main
	.type	main, @function
main:
.LFB4:
	pushl	%ebp
.LCFI0:
	movl	%esp, %ebp
.LCFI1:
	subl	$8, %esp
.LCFI2:
	andl	$-16, %esp
	subl	$16, %esp
	movl	$1, 4(%esp)
	movl	8(%ebp), %eax
	movl	%eax, (%esp)
	call	_Z8lessFuncii
	movzbl	%al, %eax
	leave
	ret
.LFE4:
	.size	main, .-main
	.ident	"GCC: (GNU) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)"
	.section	.note.GNU-stack,"",@progbits
	.file	"testTemplateInline.cpp"
	.text
	.align 2
.globl main
	.type	main, @function
main:
.LFB4:
	pushl	%ebp
.LCFI0:
	movl	%esp, %ebp
.LCFI1:
	subl	$8, %esp
.LCFI2:
	andl	$-16, %esp
	subl	$16, %esp
	cmpl	$2, 8(%ebp)
	setle	%al
	andl	$1, %eax
	leave
	ret
.LFE4:
	.size	main, .-main
	.ident	"GCC: (GNU) 4.0.1-beta20050507 (Gentoo 4.0.1_beta20050507)"
	.section	.note.GNU-stack,"",@progbits

[Index of Archives]     [Linux C Programming]     [Linux Kernel]     [eCos]     [Fedora Development]     [Fedora Announce]     [Autoconf]     [The DWARVES Debugging Tools]     [Yosemite Campsites]     [Yosemite News]     [Linux GCC]

  Powered by Linux