Hi all!
I'm having some fun with some network code I'm writing that's trying to
send 4 byte floats through ntohl/htonl. I *think* I've traced my
problems to the way my code interacts with the optimiser in GCC 4.x (GCC
3.3.6 and 2.9.5 appear to be fine).
Here's a little program that demonstrates this, an excessively verbose
version that shows a few other things I tried appears at the end of this
email. It works compiled with -O1 but -O2 breaks outputing zeros and the
whole thing goes random with -O3. Adding -fno-strict-aliasing makes it
work again but compiling with only -fstrict-aliasing doesn't break it so
it looks like a combination of things.
========================
#include <stdio.h>
// Should output "retval = 11 00 11 00" if it's working.
// Broken output is either 00 00 00 00 or some random sequence.
// Standin for ntohl/htonl
unsigned long
byte_manipulation_function_that_happens_to_take_ulongs(unsigned long in)
{
return 0x00110011;
}
float floatblah(float in)
{
unsigned long retval =
byte_manipulation_function_that_happens_to_take_ulongs(*((unsigned
long*)(&in)));
return *((float*)(&retval));
}
int main(int argc, char * argv[])
{
float myF = 10;
float retval = floatblah(myF);
char * retvalP = (char*)(&retval);
fprintf(stdout, "retval = %x %x %x %x\n",
retvalP[0],retvalP[1],retvalP[2],retvalP[3]);
return 0;
}
========================
I've found mention of issues with strict aliasing in the list archives,
often associated with pointer arithmetic and of course the man page
itself has an example with unions but I've not been able to find any
other mention of problems where only casts are involved ...
I'd appreciate any advice you people could offer on this matter!
In particular, the casting around in the "floatblah" function looks
somewhat messy (although, as I understand it, it's still valid C) but
short of memcpy'ing stuff around, is there really any other way of doing
it efficiently that the compiler won't boil down to the same thing
anyway? (I've got an alternative that splits this all up at the end
which also suffers from this problem.)
Cheers!
- Raymond
Excessively verbose version:
=========================
#include <stdio.h>
#include <string.h>
// Simple test to demonstrate an issue I'm having with compiler
optimizations
// under GCC 4.1 and above (GCC 3.3.6 and 2.9.5 don't have this problem
even at -O3).
// I compiled this with "gcc -O1 test.c -o test; ./test" and compared the
// output to "gcc -O2 test.c -o test; ./test". The last line SHOULD read
"11 00 11 00"
// (the value from the function below) but the version with -O2 will
read "00 00 00 00".
// It gets even more random with -O3. See comments below for more details.
// A simple function in lieu of the system htonl or ntohl functions (to
demonstrate
// that this problem has nothing to do with that system function). To see a
// more realistic real-world problem (ie. the problem I initially had!)
// add "#include <arpa/inet.h>" and replace occurrances of
// "byte_manipulation_function_that_happens_to_take_ulongs" with "ntohl"
or "htonl".
// The output result (with a float of 10 as the input) should be 41 20
00 00.
unsigned long
byte_manipulation_function_that_happens_to_take_ulongs(unsigned long in)
{
return 0x00110011;
}
// This works with -O1 and doesn't work with -O2. It works with -O2
-fno-strict-aliasing
// BUT if I just compile with -fstrict-aliasing it also works so it
looks like
// it's a combination of things working together that break this
(actually, under 4.1.3
// if I throw in ALL the flags that the man page says are in -O1 and -O2
it also works so
// there's something ELSE in play here as well I think ...)
float floatblah_broken(float in)
{
float * inP;
void * inV;
unsigned long * inuP;
unsigned long inu;
unsigned long retval;
unsigned long * retvalP;
void * retvalV;
float * retvalfP;
float retvalf;
inP = ∈
inV = (void*)inP;
inuP = (unsigned long*)inV;
inu = *inuP;
retval = byte_manipulation_function_that_happens_to_take_ulongs(inu);
retvalP = &retval;
retvalV = (void*)retvalP;
retvalfP = (float*)retvalV;
retvalf = *retvalfP;
return retvalf;
}
// The above, all smashed together and without void casts.
// This also works with -O1 and not -O2 but can be made to break with
-fschedule-insns -fstrict-aliasing
// and can be made to break in even more interesting ways when these two
options are used in combination
// with -fexpensive-optimizations !
float floatblah_alsobroken(float in)
{
unsigned long retval =
byte_manipulation_function_that_happens_to_take_ulongs(*((unsigned
long*)(&in)));
return *((float*)(&retval));
}
// These two work regardless - I'm guessing the printf forces the
optimizer to sit up and
// take notice! Memcpy'ing the memory around instead of using
assignments also works.
float floatblah_works(float in)
{
float * inP;
void * inV;
unsigned long * inuP;
unsigned long inu;
unsigned long retval;
unsigned long * retvalP;
void * retvalV;
float * retvalfP;
float retvalf;
inP = ∈
inV = (void*)inP;
inuP = (unsigned long*)inV;
inu = *inuP;
retval = byte_manipulation_function_that_happens_to_take_ulongs(inu);
fprintf(stdout, "retval=%ld\n", retval);
retvalP = &retval;
retvalV = (void*)retvalP;
retvalfP = (float*)retvalV;
retvalf = *retvalfP;
return retvalf;
}
float floatblah_alsoworks(float in)
{
unsigned long retval =
byte_manipulation_function_that_happens_to_take_ulongs(*((unsigned
long*)(&in)));
fprintf(stdout, "retval=%ld\n", retval);
return *((float*)(&retval));
}
// Select the one you want ...
float floatblah(float in)
{
return floatblah_broken(in);
// return floatblah_alsobroken(in);
// return floatblah_works(in);
// return floatblah_alsoworks(in);
}
// Little test thingo
int main(int argc, char * argv[])
{
float myF = 10;
float retval = floatblah(myF);
char * retvalP = (char*)(&retval);
fprintf(stdout, "retval = %x %x %x %x\n",
retvalP[0],retvalP[1],retvalP[2],retvalP[3]);
return 0;
}
=========================