I apologize if this is stupidity on my part, but I've investigated this extensively, on the web and in my code. The use of "raw" SSE integer operations (4x32) fails, though float operations work as documented. The error appears to be in the argument type checking to a builtin intrinsic, even though the types are plainly the same. The whole thing is in the attached file. Bill Ackerman wba@xxxxxxxxxxxx wbackerman@xxxxxxxxx
/* Running Windows (sorry!) XP Home, SP2, on Compaq Presario V3000 laptop. This is from a new installation of MinGW 3.4.5, with this compiler version: C:\wba\sd>g++ --version g++ (GCC) 3.4.5 (mingw-vista special r3) Copyright (C) 2004 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Compile with g++ -O4 -msse -Wall t.cpp -o t.exe It works with just the "c = b" statement. But with the "c = __builtin_ia32_paddd(a, b)" statement, it gets this: C:\wba\sd>g++ -O4 -msse -Wall t.cpp -o t.exe t.cpp: In function `int main()': t.cpp:60: error: cannot convert `int __vector__' to `int __vector__' for argument `1' to `int __vector__ __builtin_ia32_paddd(int __vector__, int __vector__)' The floating-point "__builtin_ia32_addps" operation works. */ #include <stdio.h> int main() { typedef int v4si __attribute__ ((mode(V4SI))); typedef float v4sf __attribute__ ((mode(V4SF))); v4si a, b, c; v4sf fa, fb, fc; ((int *) &a)[0] = 3; ((int *) &a)[1] = 4; ((int *) &a)[2] = 5; ((int *) &a)[3] = 6; ((int *) &b)[0] = 6; ((int *) &b)[1] = 7; ((int *) &b)[2] = 8; ((int *) &b)[3] = 9; ((float *) &fa)[0] = 3.0; ((float *) &fa)[1] = 4.0; ((float *) &fa)[2] = 5.0; ((float *) &fa)[3] = 6.0; ((float *) &fb)[0] = 6.0; ((float *) &fb)[1] = 7.0; ((float *) &fb)[2] = 8.0; ((float *) &fb)[3] = 9.0; // c = b; // This works. c = __builtin_ia32_paddd(a, b); // This fails. fc = __builtin_ia32_addps (fa, fb); // This works. printf("Hello, integer SSE world! %d %d %d %d\n", ((int *) &c)[0], ((int *) &c)[1], ((int *) &c)[2], ((int *) &c)[3]); printf("Hello, floating SSE world! %f %f %f %f\n", ((float *) &fc)[0], ((float *) &fc)[1], ((float *) &fc)[2], ((float *) &fc)[3]); return 0; }