Hi All.
In the example below, compiling with -O2 results in incorrect output
from the program. -O seems OK. Am I missing something alignment wise
(or otherwise) or is -O2 breaking my alignment?
If I use _mm_storeu_si128 then both -O2 and -O work as expected.
Any thoughts appreciated.
jp
------
bash-3.1$ gcc -O -msse2 -o sse2 sse2.c
bash-3.1$ ./sse2
c0=2 c1=2
bash-3.1$ gcc -O2 -msse2 -o sse2 sse2.c
bash-3.1$ ./sse2
c0=0 c1=0
bash-3.1$ gcc --version
gcc (GCC) 4.1.2
bash-3.1$ uname -a
Linux puma 2.6.22.8 #1 SMP Tue Sep 25 20:41:25 BST 2007 x86_64 x86_64
x86_64 GNU/Linux
sse2.c:
#include <stdio.h>
#include <emmintrin.h>
void test_int() {
// array of 2 8 byte ints
long int *a = _mm_malloc(16, 16);
long int *b = _mm_malloc(16, 16);
long int *c = _mm_malloc(16, 16);
__m128i ai __attribute__ ((aligned (16)));
__m128i bi __attribute__ ((aligned (16)));
__m128i ci __attribute__ ((aligned (16)));
a[0] = a[1] = 1;
b[0] = b[1] = 1;
c[0] = c[1] = 0;
ai = _mm_load_si128( (__m128i *) (void*)a );
bi = _mm_load_si128( (__m128i *) (void*)b );
ci = _mm_add_epi8( ai, bi );
_mm_store_si128( (__m128i *) (void*)c, ci );
printf("c0=%ld c1=%ld\n", c[0], c[1] );
}
int main( int count, char ** args ) {
test_int();
return 0;
}