hi, the following program doesn't give me a speed-up for using SIMD. the program sums a vector of integers. if i take out the line 'q=p', the SIMD version runs faster, but is not correct. i am on fedora core 5 running gcc: gcc version 4.1.0 20060304 (Red Hat 4.1.0-3) thanks in advance, jack $ gcc -O3 -mmmx -msse v.c $ time ./a.out 100000 100 0 SISD 0 real 0m0.174s user 0m0.172s sys 0m0.004s $ time ./a.out 100000 100 1 SIMD 0 real 0m1.017s user 0m1.016s sys 0m0.000s $ cat v.c #include <string.h> #include <stdio.h> #include <stdlib.h> #define IZ sizeof(I) typedef long I; typedef char C; typedef long v4si __attribute__ ((vector_size(IZ * 4))); simd(n, is) I *is; { v4si p, q, r; I *pp = (I *) & p, *qq = (I *) & q, *rr = (I *) & r, i = 0; bzero(qq, 4 * IZ); for (i = 0; i < n; i += 4) { memcpy(rr, is + i, 4 * IZ); p = q + r; q = p; } return qq[0]+qq[1]+qq[2]+qq[3]; } sisd(n, is) I *is; { I i = 0, j = 0; for (i = 0; i < n; i++) j += is[i]; return j; } main(c, v) C **v; { I n = atol(v[1]), z = atol(v[2]), m = atol(v[3]), *is = malloc(IZ * (z *= 4)); I result; printf(m?"SIMD\n":"SISD\n"); while (n--) result=m?simd(z, is):sisd(z, is); printf("%d\n",result); }