Hi, I have a small c++ test class. Many of the functions are declared inline and are declared and defined inside the class definition itself. The code refuses to compile without optimization, but compiles and executes correctly with -O{s,1,2,3}. Is this expected? Code appended in mail itself. You need to look for the last two functions in vector8s class. Rest can be ignored (is syntactic sugar). ====================== version info, standard f11 packages ============ ~/Documents/numerical/vector8s@rpg> gcc -v Using built-in specs. Target: x86_64-redhat-linux Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --enable-plugin --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-1.5.0.0/jre --enable-libgcj-multifile --enable-java-maintainer-mode --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --disable-libjava-multilib --with-ppl --with-cloog --with-tune=generic --with-arch_32=i586 --build=x86_64-redhat-linux Thread model: posix gcc version 4.4.1 20090725 (Red Hat 4.4.1-2) (GCC) ============== ~/Documents/numerical/vector8s@rpg> uname -a Linux rpg.linux 2.6.29.6-217.2.16.fc11.x86_64 #1 SMP Mon Aug 24 17:17:40 EDT 2009 x86_64 x86_64 x86_64 GNU/Linux ~/Documents/numerical/vector8s@rpg> ================== compiled with (throws error) =================== ~/Documents/numerical/vector8s@rpg> g++ vector8s.cpp -msse3 vector8s.cpp: In function ‘int main()’: vector8s.cpp:13: error: no matching function for call to ‘vector8s::first4(long long int __vector__*)’ vector8s.h:100: note: candidates are: void vector8s::first4(int*) const ~/Documents/numerical/vector8s@rpg> ==================== compiled with (compiles successfully) ==================== ~/Documents/numerical/vector8s@rpg> g++ vector8s.cpp -msse3 -O3 ~/Documents/numerical/vector8s@rpg> ===================== Test code ========== vector8s.cpp ========== #include"vector8s.h" using namespace std; int main() { short __attribute__ ((aligned(16))) arr[8]={1,12,31,14,5,61,79,18}; vector8s a(10); vector8s b(20); vector8s c; b.load((__m128i*)arr); cout<<"printing b\n"; b.print(); int __attribute__ ((aligned(16))) arr2[4]; b.first4((__m128i*)arr2); int i; cout<<"printing first 4\n"; for(i=0; i<4; i++) cout<<arr2[i]<<endl; return 0; } =============== vector8s.h ================ #ifndef VECTOR8S #define VECTOR8S #include<pmmintrin.h> #include<iostream> using namespace std; class vector8s { public: inline vector8s() { data=_mm_setzero_si128(); } inline vector8s(short x) { data=_mm_set1_epi16(x); } inline vector8s(int x) { data=_mm_set1_epi16(short(x)); } inline vector8s(__m128i &other) { data=other; } inline vector8s(vector8s &other) { data=other.data; } vector8s & operator=(const vector8s &other) { data=other.data; return *this; } inline vector8s operator+(vector8s &other) const { vector8s temp; temp.data=_mm_add_epi16(data,other.data); return temp; } inline vector8s operator-(vector8s &other) const { vector8s temp; temp.data=_mm_sub_epi16(data,other.data); return temp; } inline vector8s operator&(vector8s &other) const { vector8s temp; temp.data=_mm_and_si128(data,other.data); return temp; } inline vector8s operator|(vector8s &other) const { vector8s temp; temp.data=_mm_or_si128(data,other.data); return temp; } inline vector8s operator<<(const int amount) const { vector8s temp; temp.data=_mm_slli_epi16(data,amount); return temp; } inline vector8s operator>>(const int amount) const { vector8s temp; temp.data=_mm_srli_epi16(data,amount); return temp; } inline vector8s operator^(vector8s &other) const { vector8s temp; temp.data=_mm_xor_si128(data,other.data); return temp; } void print() const { short __attribute__ ((aligned(16))) pshort[8];; _mm_store_si128((__m128i*)pshort, data); short i; for(i=0; i<8; i++) cout<<pshort[i]<<endl; } inline void load(const __m128i *ptr) { data=_mm_load_si128(ptr); } inline void store(__m128i *ptr) const { _mm_store_si128(ptr, data); } inline void load_unaligned(const __m128i *ptr) { data=_mm_loadu_si128(ptr); } inline void store_unaligned(__m128i *ptr) const { _mm_storeu_si128(ptr, data); } inline void first4(__m128i *ptr) const { __m128i temp=_mm_unpacklo_epi16(data,_mm_setzero_si128()); _mm_store_si128(ptr, temp); } inline void last4(__m128i *ptr) const { __m128i temp=_mm_unpackhi_epi16(data,_mm_setzero_si128()); _mm_store_si128(ptr, temp); } private: __m128i data __attribute__ ((aligned(16))); }; #endif ==================== Thanks, -- Rohit Garg http://rpg-314.blogspot.com/ Senior Undergraduate Department of Physics Indian Institute of Technology Bombay