Some strict aliasing related fun

Raymond Sheh <rsheh@xxxxxxxxxxxxxxx> · Wed, 28 Nov 2007 13:09:13 +1100

Hi all!

I'm having some fun with some network code I'm writing that's trying to 
send 4 byte floats through ntohl/htonl. I *think* I've traced my 
problems to the way my code interacts with the optimiser in GCC 4.x (GCC 
3.3.6 and 2.9.5 appear to be fine).

Here's a little program that demonstrates this, an excessively verbose 
version that shows a few other things I tried appears at the end of this 
email. It works compiled with -O1 but -O2 breaks outputing zeros and the 
whole thing goes random with -O3. Adding -fno-strict-aliasing makes it 
work again but compiling with only -fstrict-aliasing doesn't break it so 
it looks like a combination of things.
========================
#include <stdio.h>

// Should output "retval = 11 00 11 00" if it's working.
// Broken output is either 00 00 00 00 or some random sequence.

// Standin for ntohl/htonl
unsigned long 
byte_manipulation_function_that_happens_to_take_ulongs(unsigned long in)
{
   return 0x00110011;
}

float floatblah(float in)
{
   unsigned long retval = 
byte_manipulation_function_that_happens_to_take_ulongs(*((unsigned 
long*)(&in)));
   return *((float*)(&retval));
}

int main(int argc, char * argv[])
{
   float myF = 10;
   float retval = floatblah(myF);
   char * retvalP = (char*)(&retval);
   fprintf(stdout, "retval = %x %x %x %x\n", 
retvalP[0],retvalP[1],retvalP[2],retvalP[3]); 
   return 0;
}
========================

I've found mention of issues with strict aliasing in the list archives, 
often associated with pointer arithmetic and of course the man page 
itself has an example with unions but I've not been able to find any 
other mention of problems where only casts are involved ...

I'd appreciate any advice you people could offer on this matter!

In particular, the casting around in the "floatblah" function looks 
somewhat messy (although, as I understand it, it's still valid C) but 
short of memcpy'ing stuff around, is there really any other way of doing 
it efficiently that the compiler won't boil down to the same thing 
anyway? (I've got an alternative that splits this all up at the end 
which also suffers from this problem.)

Cheers!

- Raymond

Excessively verbose version:
=========================
#include <stdio.h>
#include <string.h>

// Simple test to demonstrate an issue I'm having with compiler 
optimizations
// under GCC 4.1 and above (GCC 3.3.6 and 2.9.5 don't have this problem 
even at -O3).
// I compiled this with "gcc -O1 test.c -o test; ./test" and compared the
// output to "gcc -O2 test.c -o test; ./test". The last line SHOULD read 
"11 00 11 00"
// (the value from the function below) but the version with -O2 will 
read "00 00 00 00".
// It gets even more random with -O3. See comments below for more details.

// A simple function in lieu of the system htonl or ntohl functions (to 
demonstrate
// that this problem has nothing to do with that system function). To see a
// more realistic real-world problem (ie. the problem I initially had!)
// add "#include <arpa/inet.h>" and replace occurrances of
// "byte_manipulation_function_that_happens_to_take_ulongs" with "ntohl" 
or "htonl".
// The output result (with a float of 10 as the input) should be 41 20 
00 00.
unsigned long 
byte_manipulation_function_that_happens_to_take_ulongs(unsigned long in)
{
   return 0x00110011;
}

// This works with -O1 and doesn't work with -O2. It works with -O2 
-fno-strict-aliasing
// BUT if I just compile with -fstrict-aliasing it also works so it 
looks like
// it's a combination of things working together that break this 
(actually, under 4.1.3
// if I throw in ALL the flags that the man page says are in -O1 and -O2 
it also works so
// there's something ELSE in play here as well I think ...)
float floatblah_broken(float in)
{
   float * inP;
   void * inV;
   unsigned long * inuP;
   unsigned long inu;
   unsigned long retval;
   unsigned long * retvalP;
   void * retvalV;
   float * retvalfP;
   float retvalf;

   inP = &in;
   inV = (void*)inP;
   inuP = (unsigned long*)inV;
   inu = *inuP;
   retval = byte_manipulation_function_that_happens_to_take_ulongs(inu);
   retvalP = &retval;
   retvalV = (void*)retvalP;
   retvalfP = (float*)retvalV;
   retvalf = *retvalfP;
   return retvalf;
}

// The above, all smashed together and without void casts.
// This also works with -O1 and not -O2 but can be made to break with 
-fschedule-insns -fstrict-aliasing
// and can be made to break in even more interesting ways when these two 
options are used in combination 
// with -fexpensive-optimizations !
float floatblah_alsobroken(float in)
{
   unsigned long retval = 
byte_manipulation_function_that_happens_to_take_ulongs(*((unsigned 
long*)(&in)));
   return *((float*)(&retval));
}

// These two work regardless - I'm guessing the printf forces the 
optimizer to sit up and
// take notice! Memcpy'ing the memory around instead of using 
assignments also works.
float floatblah_works(float in)
{
   float * inP;
   void * inV;
   unsigned long * inuP;
   unsigned long inu;
   unsigned long retval;
   unsigned long * retvalP;
   void * retvalV;
   float * retvalfP;
   float retvalf;

   inP = &in;
   inV = (void*)inP;
   inuP = (unsigned long*)inV;
   inu = *inuP;
   retval = byte_manipulation_function_that_happens_to_take_ulongs(inu);

   fprintf(stdout, "retval=%ld\n", retval);

   retvalP = &retval;
   retvalV = (void*)retvalP;
   retvalfP = (float*)retvalV;
   retvalf = *retvalfP;
   return retvalf;
}

float floatblah_alsoworks(float in)
{
   unsigned long retval = 
byte_manipulation_function_that_happens_to_take_ulongs(*((unsigned 
long*)(&in)));
   fprintf(stdout, "retval=%ld\n", retval);
   return *((float*)(&retval));
}

// Select the one you want ...
float floatblah(float in)
{
   return floatblah_broken(in);
//    return floatblah_alsobroken(in);
//    return floatblah_works(in);
//    return floatblah_alsoworks(in);
}

// Little test thingo
int main(int argc, char * argv[])
{
   float myF = 10;
   float retval = floatblah(myF);
   char * retvalP = (char*)(&retval);
   fprintf(stdout, "retval = %x %x %x %x\n", 
retvalP[0],retvalP[1],retvalP[2],retvalP[3]); 
   return 0;
}
=========================