/** * Why are vectors so much slower than plain old loops? Shouldn't * they be faster? Do I have to actually call the built-in MMX and * SSE instructions myself? Shouldn't the compiler be able to do this * given this much information? * * Results compiled on Intel(R) Core(TM)2 Duo CPU E8500 @ 3.16GHz * Using: "-O3 -march=core2" * * $ gcc -v * gcc version 4.3.0 20080428 (Red Hat 4.3.0-8) (GCC) * * $ time ./test 1000000 * * real 0m3.639s * user 0m3.634s * sys 0m0.001s * $ time ./test 1000000 b * * real 0m9.160s * user 0m9.148s * sys 0m0.002s * * Please correct what I'm doing wrong here to make the vector * version faster. * * Note: if you make SIZE smaller it only gets worse! * **/ #include <stdio.h> #define SIZE 2048 typedef char vSIZEqi __attribute__ ((vector_size (SIZE))); static void loop_method (char out[4][SIZE], char a[SIZE], char b[SIZE], char c[SIZE], char d[SIZE], char e[SIZE]) { int i; for (i = 0; i < SIZE; i++) { char g = a[i] * b[i]; char h = a[i] * c[i]; out[0][i] = d[i] * g; out[1][i] = e[i] * g; out[2][i] = d[i] * h; out[3][i] = e[i] * h; } } static void vector_method (vSIZEqi out[4], vSIZEqi *a, vSIZEqi *b, vSIZEqi *c, vSIZEqi *d, vSIZEqi *e) { vSIZEqi g = *a * *b; vSIZEqi h = *a * *c; out[0] = *d * g; out[1] = *e * g; out[2] = *d * h; out[3] = *e * h; } int main (int argc, char *argv[] __attribute__ ((unused))) { int i; char a[SIZE], b[SIZE], c[SIZE], d[SIZE], e[SIZE]; char out[4][SIZE]; int loops = 1000000; if (argc > 1) sscanf (argv[1], "%d", &loops); for (i = 0; i < loops; i++) { if (argc > 2) vector_method( (vSIZEqi *) out, (vSIZEqi *) &a, (vSIZEqi *) &b, (vSIZEqi *) &c, (vSIZEqi *) &d, (vSIZEqi *) &e); else loop_method (out, a, b, c, d, e); } return 0; }