problems with optimisation

Kicer <kicer86@xxxxxxxxx> · Fri, 28 Dec 2012 11:25:01 +0100

Hi all

Last days I've found a problem with some certain code optimisations:

namespace 
{

  struct Base;

  struct Bit
  {
	  const Base &m_p;
	  const int m_pos;

	  constexpr Bit(const Base &p, const int pos): m_p(p), m_pos(pos)
	  {
	  }

	  operator bool() const;  
  };

  struct Base
  {  
	  const int m_port;
	  constexpr Base(int p): m_port(p)
	  {
	  }

	  operator char () const
	  {
		  char result;

		  asm(
			"in %%dx, %%al\n"
			:"=a"(result)
			:"d"(m_port)
		  );

		  //result = *(reinterpret_cast<char *>(m_port+32));

		  return result;
	  }

	  Bit operator[] (int p) const
	  {
		  Bit r(*this, p);
		  return r;
	  }

  };

  Bit::operator bool() const
  {
	  const char v = m_p;
	  const bool r = (v & (1 << m_pos)) > 0;

	  return r;
  }

  struct Anc: public Base
  {
	  const Base m_in;
	  constexpr Anc(int o): Base(o), m_in(o - 1)
	  {
	  }

	  const Base& getIn() const
	  {
		  return m_in;
	  }

  };

}

template<int v>
char foo()
{
	Anc p(v), p2(v+2);
	char r = p.getIn() + p2.getIn();

	//r += p[0]? 1: 0;                   //commented out at first step
	r += p2[4]? 1 : 0;

	return r;
}

char bar()
{
  char r = foo<4>();

  r-= foo<6>();

  return r;
}

there are 3 structs which looks more complex than the code they generate.
foo() and bar() are just ising those structs.
For the code above output is short and clear as expected:

   0:	ba 03 00 00 00       	mov    $0x3,%edx
   5:	be 01 00 00 00       	mov    $0x1,%esi
   a:	ec                   	in     (%dx),%al
   b:	b2 05                	mov    $0x5,%dl
   d:	41 88 c0             	mov    %al,%r8b
  10:	ec                   	in     (%dx),%al
  11:	b2 06                	mov    $0x6,%dl
  13:	40 88 c7             	mov    %al,%dil
  16:	ec                   	in     (%dx),%al
  17:	b2 07                	mov    $0x7,%dl
  19:	88 c1                	mov    %al,%cl
  1b:	ec                   	in     (%dx),%al
  1c:	b2 08                	mov    $0x8,%dl
  1e:	41 88 c1             	mov    %al,%r9b
  21:	c0 e9 04             	shr    $0x4,%cl
  24:	ec                   	in     (%dx),%al
  25:	c0 e8 04             	shr    $0x4,%al
  28:	41 01 f9             	add    %edi,%r9d
  2b:	83 e1 01             	and    $0x1,%ecx
  2e:	21 c6                	and    %eax,%esi
  30:	42 8d 04 07          	lea    (%rdi,%r8,1),%eax
  34:	44 01 ce             	add    %r9d,%esi
  37:	01 c8                	add    %ecx,%eax
  39:	40 0f be f6          	movsbl %sil,%esi
  3d:	0f be c0             	movsbl %al,%eax
  40:	29 f0                	sub    %esi,%eax
  42:	c3                   	retq   

but when I uncomment "//r += p[0]? 1: 0; " in foo(), the code becomes 
unexpectly large and unclear:

0000000000000000 <_ZNK12_GLOBAL__N_13BitcvbEv>:
   0:	48 8b 07             	mov    (%rdi),%rax
   3:	8b 4f 08             	mov    0x8(%rdi),%ecx
   6:	be 01 00 00 00       	mov    $0x1,%esi
   b:	8b 10                	mov    (%rax),%edx
   d:	d3 e6                	shl    %cl,%esi
   f:	ec                   	in     (%dx),%al
  10:	0f be c0             	movsbl %al,%eax
  13:	85 f0                	test   %esi,%eax
  15:	0f 9f c0             	setg   %al
  18:	c3                   	retq   

0000000000000019 <_Z3barv>:
  19:	53                   	push   %rbx
  1a:	e8 00 00 00 00       	callq  1f <_Z3barv+0x6>
  1f:	88 c3                	mov    %al,%bl
  21:	e8 00 00 00 00       	callq  26 <_Z3barv+0xd>
  26:	0f be d3             	movsbl %bl,%edx
  29:	0f be c0             	movsbl %al,%eax
  2c:	29 c2                	sub    %eax,%edx
  2e:	88 d0                	mov    %dl,%al
  30:	5b                   	pop    %rbx
  31:	c3                   	retq   

Disassembly of section .text._Z3fooILi4EEcv:

0000000000000000 <_Z3fooILi4EEcv>:
   0:	41 54                	push   %r12
   2:	ba 03 00 00 00       	mov    $0x3,%edx
   7:	ec                   	in     (%dx),%al
   8:	55                   	push   %rbp
   9:	b2 05                	mov    $0x5,%dl
   b:	40 88 c5             	mov    %al,%bpl
   e:	ec                   	in     (%dx),%al
   f:	53                   	push   %rbx
  10:	41 88 c4             	mov    %al,%r12b
  13:	41 8d 1c 2c          	lea    (%r12,%rbp,1),%ebx
  17:	48 83 ec 20          	sub    $0x20,%rsp
  1b:	48 8d 04 24          	lea    (%rsp),%rax
  1f:	48 8d 7c 24 10       	lea    0x10(%rsp),%rdi
  24:	c7 04 24 04 00 00 00 	movl   $0x4,(%rsp)
  2b:	c7 44 24 04 03 00 00 	movl   $0x3,0x4(%rsp)
  32:	00 
  33:	c7 44 24 08 06 00 00 	movl   $0x6,0x8(%rsp)
  3a:	00 
  3b:	0f be db             	movsbl %bl,%ebx
  3e:	c7 44 24 0c 05 00 00 	movl   $0x5,0xc(%rsp)
  45:	00 
  46:	48 89 44 24 10       	mov    %rax,0x10(%rsp)
  4b:	c7 44 24 18 00 00 00 	movl   $0x0,0x18(%rsp)
  52:	00 
  53:	e8 00 00 00 00       	callq  58 <_Z3fooILi4EEcv+0x58>
  58:	0f b6 c0             	movzbl %al,%eax
  5b:	48 8d 7c 24 10       	lea    0x10(%rsp),%rdi
  60:	c7 44 24 18 04 00 00 	movl   $0x4,0x18(%rsp)
  67:	00 
  68:	01 c3                	add    %eax,%ebx
  6a:	48 8d 44 24 08       	lea    0x8(%rsp),%rax
  6f:	0f be db             	movsbl %bl,%ebx
  72:	48 89 44 24 10       	mov    %rax,0x10(%rsp)
  77:	e8 00 00 00 00       	callq  7c <_Z3fooILi4EEcv+0x7c>
  7c:	48 83 c4 20          	add    $0x20,%rsp
  80:	0f b6 c0             	movzbl %al,%eax
  83:	01 d8                	add    %ebx,%eax
  85:	5b                   	pop    %rbx
  86:	5d                   	pop    %rbp
  87:	41 5c                	pop    %r12
  89:	c3                   	retq   

Disassembly of section .text._Z3fooILi6EEcv:

0000000000000000 <_Z3fooILi6EEcv>:
   0:	41 54                	push   %r12
   2:	ba 05 00 00 00       	mov    $0x5,%edx
   7:	ec                   	in     (%dx),%al
   8:	55                   	push   %rbp
   9:	b2 07                	mov    $0x7,%dl
   b:	40 88 c5             	mov    %al,%bpl
   e:	ec                   	in     (%dx),%al
   f:	53                   	push   %rbx
  10:	41 88 c4             	mov    %al,%r12b
  13:	41 8d 1c 2c          	lea    (%r12,%rbp,1),%ebx
  17:	48 83 ec 20          	sub    $0x20,%rsp
  1b:	48 8d 04 24          	lea    (%rsp),%rax
  1f:	48 8d 7c 24 10       	lea    0x10(%rsp),%rdi
  24:	c7 04 24 06 00 00 00 	movl   $0x6,(%rsp)
  2b:	c7 44 24 04 05 00 00 	movl   $0x5,0x4(%rsp)
  32:	00 
  33:	c7 44 24 08 08 00 00 	movl   $0x8,0x8(%rsp)
  3a:	00 
  3b:	0f be db             	movsbl %bl,%ebx
  3e:	c7 44 24 0c 07 00 00 	movl   $0x7,0xc(%rsp)
  45:	00 
  46:	48 89 44 24 10       	mov    %rax,0x10(%rsp)
  4b:	c7 44 24 18 00 00 00 	movl   $0x0,0x18(%rsp)
  52:	00 
  53:	e8 00 00 00 00       	callq  58 <_Z3fooILi6EEcv+0x58>
  58:	0f b6 c0             	movzbl %al,%eax
  5b:	48 8d 7c 24 10       	lea    0x10(%rsp),%rdi
  60:	c7 44 24 18 04 00 00 	movl   $0x4,0x18(%rsp)
  67:	00 
  68:	01 c3                	add    %eax,%ebx
  6a:	48 8d 44 24 08       	lea    0x8(%rsp),%rax
  6f:	0f be db             	movsbl %bl,%ebx
  72:	48 89 44 24 10       	mov    %rax,0x10(%rsp)
  77:	e8 00 00 00 00       	callq  7c <_Z3fooILi6EEcv+0x7c>
  7c:	48 83 c4 20          	add    $0x20,%rsp
  80:	0f b6 c0             	movzbl %al,%eax
  83:	01 d8                	add    %ebx,%eax
  85:	5b                   	pop    %rbx
  86:	5d                   	pop    %rbp
  87:	41 5c                	pop    %r12
  89:	c3                   	retq   

compilation flags:
g++ -Os test.cpp -c -o test.o -std=c++11

this may seem to be a less important problem for x86 archs, but I'm affected 
with this problem on avr arch where memory is very limited. Can I somehow 
figure out why gcc resigns from generation clean code in second example?

regards

-- 
Michał Walenciak
gmail.com kicer86
http://kicer.sileman.net.pl
gg: 3729519