[KVM-autotest][PATCH 1/2] Virt test:Add new instruction sets

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



fma4, sse4a, xop

Signed-off-by: Jiří Župka <jzupka@xxxxxxxxxx>
---
 client/virt/deps/test_cpu_flags/Makefile        |   83 +++++++++++------------
 client/virt/deps/test_cpu_flags/aes.c           |   13 +++-
 client/virt/deps/test_cpu_flags/avx.c           |   30 +++++----
 client/virt/deps/test_cpu_flags/cpuflags-test.c |   57 +++++++++++-----
 client/virt/deps/test_cpu_flags/fma4.c          |   31 +++++++++
 client/virt/deps/test_cpu_flags/pclmul.c        |   10 ++-
 client/virt/deps/test_cpu_flags/rdrand.c        |    6 +-
 client/virt/deps/test_cpu_flags/sse3.c          |   12 +++-
 client/virt/deps/test_cpu_flags/sse4.c          |   25 ++++++--
 client/virt/deps/test_cpu_flags/sse4a.c         |   37 ++++++++++
 client/virt/deps/test_cpu_flags/ssse3.c         |   14 +++-
 client/virt/deps/test_cpu_flags/stress.c        |    6 ++
 client/virt/deps/test_cpu_flags/tests.h         |   38 ++++++++--
 client/virt/deps/test_cpu_flags/xop.c           |   48 +++++++++++++
 14 files changed, 309 insertions(+), 101 deletions(-)
 create mode 100644 client/virt/deps/test_cpu_flags/fma4.c
 create mode 100644 client/virt/deps/test_cpu_flags/sse4a.c
 create mode 100644 client/virt/deps/test_cpu_flags/xop.c

diff --git a/client/virt/deps/test_cpu_flags/Makefile b/client/virt/deps/test_cpu_flags/Makefile
index 5b77740..b95c36e 100644
--- a/client/virt/deps/test_cpu_flags/Makefile
+++ b/client/virt/deps/test_cpu_flags/Makefile
@@ -3,57 +3,42 @@ MKDIR = mkdir -p
 OPTFLAGS=-O3
 
 CFLAGS= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
+	-ftree-vectorize \
 	-ffast-math \
 	-fopenmp \
 
-CFLAGSAVX= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
-	-mavx \
-	-fopenmp \
+CFLAGSAVX= ${CFLAGS} \
+		-mavx \
 
-CFLAGSSSE4= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
+CFLAGSFMA4= ${CFLAGS} \
+	-mfma4 \
+
+CFLAGSSSE4A= ${CFLAGS} \
+	-msse4a \
+
+CFLAGSSSE4= ${CFLAGS} \
 	-msse4 -msse4.1 -msse4.2 \
-	-fopenmp \
 
-CFLAGSSSSE3= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
+CFLAGSSSSE3= ${CFLAGS} \
 	-mssse3 \
-	-fopenmp \
 
-CFLAGSSSE3= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
+CFLAGSSSE3= ${CFLAGS} \
 	-msse3 \
-	-fopenmp \
 
-CFLAGSAES= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
+CFLAGSAES= ${CFLAGS} \
 	-maes \
-	-fopenmp \
 
-CFLAGSPCLMUL= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
+CFLAGSPCLMUL= ${CFLAGS} \
 	-mpclmul \
-	-fopenmp \
 
-CFLAGSRDRAND= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
+CFLAGSRDRAND= ${CFLAGS} \
 	-mrdrnd \
-	-fopenmp \
 
-CFLAGSSTRESS= -m64 ${OPTFLAGS} -std=c99 -pipe \
-	-ftree-vectorize -ftree-vectorizer-verbose=0 \
-	-ffast-math \
+CFLAGSXOP= ${CFLAGS} \
+	-mxop \
+
+CFLAGSSTRESS= ${CFLAGS} \
 	$(EXTRA_FLAGS) \
-	-fopenmp \
 
 CXX=g++
 CC=gcc
@@ -66,39 +51,51 @@ default:cpuflags-test
 
 all:cpuflags-test
 
-cpuflags-test: avx.o sse4.o ssse3.o sse3.o aes.o pclmul.o rdrand.o stress.o
+cpuflags-test: avx.o fma4.o xop.o sse4a.o sse4.o ssse3.o sse3.o aes.o pclmul.o rdrand.o stress.o
 	$(CC) $(CFLAGS) $(LIBS) cpuflags-test.c -o cpuflags-test \
 		aes.o \
 		pclmul.o \
 		rdrand.o \
 		avx.o \
+		fma4.o \
+		xop.o \
+		sse4a.o \
 		sse4.o \
 		ssse3.o \
 		sse3.o \
 		stress.o \
 
-aes.o: aes.c
+aes.o: aes.c tests.h
 	$(CC) $(CFLAGSAES) $(LIBS) -c aes.c
 
-pclmul.o: pclmul.c
+pclmul.o: pclmul.c tests.h
 	$(CC) $(CFLAGSPCLMUL) $(LIBS) -c pclmul.c
 
-rdrand.o: rdrand.c
+rdrand.o: rdrand.c tests.h
 	$(CC) $(CFLAGSRDRAND) $(LIBS) -c rdrand.c
 
-avx.o: avx.c
+fma4.o: fma4.c tests.h
+	$(CC) $(CFLAGSFMA4) $(LIBS) -c fma4.c
+
+xop.o: xop.c tests.h
+	$(CC) $(CFLAGSXOP) $(LIBS) -c xop.c
+
+avx.o: avx.c tests.h
 	$(CC) $(CFLAGSAVX) $(LIBS) -c avx.c
 
-sse4.o: sse4.c
+sse4a.o: sse4a.c tests.h
+	$(CC) $(CFLAGSSSE4A) $(LIBS) -c sse4a.c
+
+sse4.o: sse4.c tests.h
 	$(CC) $(CFLAGSSSE4) $(LIBS) -c sse4.c
 
-ssse3.o: ssse3.c
+ssse3.o: ssse3.c tests.h
 	$(CC) $(CFLAGSSSSE3) $(LIBS) -c ssse3.c
 
-sse3.o: sse3.c
+sse3.o: sse3.c tests.h
 	$(CC) $(CFLAGSSSE3) $(LIBS) -c sse3.c
 
-stress.o: stress.c
+stress.o: stress.c tests.h
 	$(CC) $(CFLAGSSTRESS) $(LIBS) -c stress.c
 
 ARCHIVE= cpuflags-test
diff --git a/client/virt/deps/test_cpu_flags/aes.c b/client/virt/deps/test_cpu_flags/aes.c
index b8dc5cc..7132ec7 100644
--- a/client/virt/deps/test_cpu_flags/aes.c
+++ b/client/virt/deps/test_cpu_flags/aes.c
@@ -7,8 +7,10 @@
 
 #include "tests.h"
 
+#define result (5931894172722287318L)
+
 #ifdef __AES__
-void aes(){
+int aes(){
 	__ma128i v1;
 	__ma128i v2;
 	for (int i = 1;i >= 0; i--){
@@ -17,10 +19,15 @@ void aes(){
 	}
 	__ma128i v3;
 	v3.i = _mm_aesdeclast_si128(v1.i, v2.i);
-	printf("[%d %d %d]\n",v1.ui64[0],v2.ui64[0],v3.ui64[0]);
+	if (v3.ui64[0] != result){
+		printf("Correct: %ld result: %ld\n", result, v3.ui64[0]);
+		return -1;
+	}
+	return 0;
 }
 #else
-void aes(){
+int aes(){
 	printf("AES is not supported.");
+	return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/avx.c b/client/virt/deps/test_cpu_flags/avx.c
index bf06929..179c51b 100644
--- a/client/virt/deps/test_cpu_flags/avx.c
+++ b/client/virt/deps/test_cpu_flags/avx.c
@@ -7,15 +7,8 @@
 #include "tests.h"
 
 #ifdef __AVX__
-
-typedef union __attribute__ ((aligned(32))){
-	__m256 v;
-	float f32[8];
-} __mar256;
-
-
-void avx(){
-	__mar256 a,b;
+int avx(){
+	__ma256 a,b,c;
 
 	__m256 ymm0;
 	__m256 ymm1;
@@ -27,17 +20,26 @@ void avx(){
 
 	ymm0 = _mm256_load_ps(a.f32);
 	ymm1 = _mm256_load_ps(b.f32);
-	__mar256 ymm3;
-	ymm3.v = _mm256_sub_ps(ymm0,ymm1);
-	_mm256_store_ps(b.f32, ymm3.v );
+	__ma256 ymm3;
+	ymm3.f = _mm256_sub_ps(ymm0,ymm1);
+	_mm256_store_ps(c.f32, ymm3.f);
 	for (int i = 0;i < 8; i++){
-		printf("[%f]\n", b.f32[i]);
+		if (((a.f32[i] - b.f32[i]) - c.f32[i]) > FLT_EPSILON){
+			printf("Wrong result:\n");
+			for (int i = 0;i < 8; i++){
+				printf("Correct: %f result: %f\n", a.f32[i] - b.f32[i],
+						c.f32[i]);
+			}
+			return -1;
+		}
 	}
+	return 0;
 }
 
 #endif
 #ifndef __AVX__
-void avx(){
+int avx(){
 	printf("AVX is not supported.");
+	return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/cpuflags-test.c b/client/virt/deps/test_cpu_flags/cpuflags-test.c
index 483561c..0d7200e 100644
--- a/client/virt/deps/test_cpu_flags/cpuflags-test.c
+++ b/client/virt/deps/test_cpu_flags/cpuflags-test.c
@@ -5,12 +5,16 @@
 
 void print_help(){
 	printf(
-			"  --sse4                     test sse4 instruction.\n"
+			"  --sse3                     test sse3 instruction.\n"
 			"  --ssse3                    test ssse3 instruction.\n"
+			"  --sse4                     test sse4 instruction.\n"
+			"  --sse4a                    test sse4a instruction.\n"
 			"  --avx                      test avx instruction.\n"
 			"  --aes                      test aes instruction.\n"
 			"  --pclmul                   test carry less multiplication.\n"
 			"  --rdrand                   test rdrand instruction.\n"
+			"  --fma4                     test fma4 instruction.\n"
+			"  --xop                     test fma4 instruction.\n"
 			"  --stress n_cpus,avx,aes    start stress on n_cpus.and cpuflags\n");
 }
 
@@ -35,6 +39,9 @@ inst parse_Inst(char * optarg){
 		else if(strcmp(pch,"sse4") == 0){
 			i.sse4 = 1;
 		}
+		else if(strcmp(pch,"sse4a") == 0){
+			i.sse4a = 1;
+		}
 		else if(strcmp(pch,"avx") == 0){
 			i.avx = 1;
 		}
@@ -47,6 +54,12 @@ inst parse_Inst(char * optarg){
 		else if(strcmp(pch,"rdrand") == 0){
 			i.rdrand = 1;
 		}
+		else if(strcmp(pch,"fma4") == 0){
+			i.fma4 = 1;
+		}
+		else if(strcmp(pch,"xop") == 0){
+			i.xop = 1;
+		}
 		pch = strtok (NULL, ",");
 	}
 	return i;
@@ -57,18 +70,22 @@ int main(int argc, char **argv) {
 	int digit_optind = 0;
 	int opt_count = 0;
 
+	int ret = 0;
 	while (1) {
 		int this_option_optind = optind ? optind : 1;
 		int option_index = 0;
 		static struct option long_options[] =
-				{{ "sse3",  no_argument, 0, 0 },
+				{{ "stress",required_argument, 0, 0 },
+				{ "sse3",   no_argument, 0, 0 },
 				{ "ssse3",  no_argument, 0, 0 },
 				{ "sse4",   no_argument, 0, 0 },
+				{ "sse4a",  no_argument, 0, 0 },
 				{ "avx",    no_argument, 0, 0 },
 				{ "aes",    no_argument, 0, 0 },
 				{ "pclmul", no_argument, 0, 0 },
 				{ "rdrand", no_argument, 0, 0 },
-				{ "stress", required_argument, 0, 0 },
+				{ "fma4",   no_argument, 0, 0 },
+				{ "xop",    no_argument, 0, 0 },
 				{ 0, 0, 0, 0}};
 
 		c = getopt_long(argc, argv, "", long_options, &option_index);
@@ -80,37 +97,41 @@ int main(int argc, char **argv) {
 
 		switch (c) {
 		case 0:
-			printf("option %s", long_options[option_index].name);
-			if (optarg)
-				printf(" with arg %s", optarg);
-			printf("\n");
 			switch (option_index) {
 			case 0:
-				sse3();
+				stress(parse_Inst(optarg));
 				break;
 			case 1:
-				ssse3();
+				ret += sse3();
 				break;
 			case 2:
-				sse4();
+				ret += ssse3();
 				break;
 			case 3:
-				avx();
+				ret += sse4();
 				break;
 			case 4:
-				aes();
+				ret += sse4a();
 				break;
 			case 5:
-				pclmul();
+				ret += avx();
 				break;
 			case 6:
-				rdrand();
+				ret += aes();
 				break;
 			case 7:
-				stress(parse_Inst(optarg));
+				ret += pclmul();
+				break;
+			case 8:
+				ret += rdrand();
+				break;
+			case 9:
+				ret += fma4();
+				break;
+			case 10:
+				ret += xop();
 				break;
 			}
-			printf("\n");
 			break;
 
 		case '?':
@@ -123,5 +144,9 @@ int main(int argc, char **argv) {
 		}
 		opt_count += 1;
 	}
+	if (ret > 0) {
+		printf("%d test fail.\n", ret);
+		exit(-1);
+	}
 	exit(0);
 }
diff --git a/client/virt/deps/test_cpu_flags/fma4.c b/client/virt/deps/test_cpu_flags/fma4.c
new file mode 100644
index 0000000..48739e1
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/fma4.c
@@ -0,0 +1,31 @@
+/*
+ * fma4.c
+ *
+ *  Created on: Nov 29, 2011
+ *      Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __FMA4__
+
+int fma4(){
+	__ma256 a, b, c, d;
+	int i;
+	for (i = 0; i < 4; i++) {
+		a.d64[i] = i;
+		b.d64[i] = 2.;
+		c.d64[i] = 3.;
+	}
+	d.d = _mm256_macc_pd(a.d, b.d, c.d);
+	for (i = 0; i < 4; i++) printf(" %.3lf", d.d64[i]);
+	printf("\n");
+	return 0;
+}
+
+#endif
+#ifndef __FMA4__
+int fma4(){
+	printf("FMA4 is not supported.");
+	return 0;
+}
+#endif
diff --git a/client/virt/deps/test_cpu_flags/pclmul.c b/client/virt/deps/test_cpu_flags/pclmul.c
index 3387a17..1877e8b 100644
--- a/client/virt/deps/test_cpu_flags/pclmul.c
+++ b/client/virt/deps/test_cpu_flags/pclmul.c
@@ -8,7 +8,7 @@
 #include "tests.h"
 
 #ifdef __PCLMUL__
-void pclmul(){
+int pclmul(){
 	__ma128i v1;
 	__ma128i v2;
 	for (int i = 1;i >= 0; i--){
@@ -17,10 +17,14 @@ void pclmul(){
 	}
 	__ma128i v3;
 	v3.i = _mm_clmulepi64_si128(v1.i, v2.i, 0);
-	printf("[%d %d %d]\n",v1.ui64[0],v2.ui64[0],v3.ui64[0]);
+	if (v3.ui64[0] != 5)
+		printf("Correct: %d result: %d\n", 5, v3.ui64[0]);
+		return -1;
+	return 0;
 }
 #else
-void pclmul(){
+int pclmul(){
 	printf("PCMUL is not supported.");
+	return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/rdrand.c b/client/virt/deps/test_cpu_flags/rdrand.c
index f9d1b76..8a6cb58 100644
--- a/client/virt/deps/test_cpu_flags/rdrand.c
+++ b/client/virt/deps/test_cpu_flags/rdrand.c
@@ -8,7 +8,7 @@
 #include "tests.h"
 
 #ifdef __RDRND__
-void rdrand()
+int rdrand()
 {
 	int val, num=1;
 	while (num--) {
@@ -19,9 +19,11 @@ void rdrand()
 		__asm volatile("movl %%eax,%0" : "=m"(val));
 		printf("Random is %d\n",val);
 	}
+	return 0;
 }
 #else
-void rdrand(){
+int rdrand(){
 	printf("RDRAND is not supported.");
+	return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/sse3.c b/client/virt/deps/test_cpu_flags/sse3.c
index 18d2643..fd38821 100644
--- a/client/virt/deps/test_cpu_flags/sse3.c
+++ b/client/virt/deps/test_cpu_flags/sse3.c
@@ -9,7 +9,7 @@
 #include "tests.h"
 
 #ifdef __SSE3__
-void sse3(){
+int sse3(){
 	__ma128f v1;
 	__ma128f v2;
 	for (int i = 4;i >= 0; i--){
@@ -18,11 +18,17 @@ void sse3(){
 	}
 	__ma128f vo;
 	vo.f = _mm_addsub_ps(v1.f,v2.f);
-	printf("[%f]\n", vo.f32[3]);
+	if (abs(vo.f32[3] - (v1.f32[3]+v2.f32[3])) < FLT_EPSILON){
+		return 0;
+	}else{
+		printf("Correct: %f result: %f\n",v1.f32[3]+v2.f32[3], vo.f32[3]);
+		return -1;
+	}
 }
 #else
-void sse3(){
+int sse3(){
 	printf("SSE3 is not supported.");
+	return 0;
 }
 #endif
 
diff --git a/client/virt/deps/test_cpu_flags/sse4.c b/client/virt/deps/test_cpu_flags/sse4.c
index f9b60fb..0f0a5fb 100644
--- a/client/virt/deps/test_cpu_flags/sse4.c
+++ b/client/virt/deps/test_cpu_flags/sse4.c
@@ -8,21 +8,36 @@
 #include "tests.h"
 
 #if (defined __SSE4_1__ || defined __SSE4_2__)
-void sse4(){
+int sse4(){
 	__ma128i v1;
 	__ma128i v2;
-	for (int i = 16;i >= 0; i--){
+	for (int i = 15;i >= 0; i--){
 		v1.ui8[i] = i;
 		v2.ui8[i] = 16-i;
 	}
 	__ma128i v3;
 	v3.i = _mm_max_epi8(v1.i,v2.i);
-	for (int i = 15;i >= 0; i--){
-		printf("max[%d]\n",v3.ui8[i]);
+	int ret = 0;
+	for (int i = 0;i < 16; i++){
+		if (v1.ui8[i] < v2.ui8[i]){
+			if (v3.ui8[i] != v2.ui8[i])
+				ret = 1;
+		}else{
+			if (v3.ui8[i] != v1.ui8[i])
+				ret = 1;
+		}
+	}
+	if (ret){
+		printf("Wrong result:\n");
+		for (int i = 15;i >= 0; i--){
+			printf("max[%d]\n",v3.ui8[i]);
+		}
 	}
+	return ret;
 }
 #else
-void sse4(){
+int sse4(){
 	printf("SSE4 is not supported.");
+	return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/sse4a.c b/client/virt/deps/test_cpu_flags/sse4a.c
new file mode 100644
index 0000000..a5fbcd9
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/sse4a.c
@@ -0,0 +1,37 @@
+/*
+ * sse4a.c
+ *
+ *  Created on: Nov 29, 2011
+ *      Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __SSE4A__
+
+int sse4a(){
+	__ma128f v;
+	double d[2];
+	d[0] = -1.;
+	d[1] = -2.;
+	v.d64[0] = 0.;
+	v.d64[1] = 1.;
+	_mm_stream_sd(&d[0], v.d);
+	for (int i = 0;i < 2; i++){
+		if (v.d64[i] != d[i]){
+			printf("Wrong result:\n");
+			for (int i = 0;i < 2; i++){
+				printf("Correct: %f result: %f\n", d[i], v.d64[i]);
+			}
+			return -1;
+		}
+	}
+	return 0;
+}
+
+#endif
+#ifndef __SSE4A__
+int sse4a(){
+	printf("SSE4A is not supported.");
+	return 0;
+}
+#endif
diff --git a/client/virt/deps/test_cpu_flags/ssse3.c b/client/virt/deps/test_cpu_flags/ssse3.c
index 8372f43..6604764 100644
--- a/client/virt/deps/test_cpu_flags/ssse3.c
+++ b/client/virt/deps/test_cpu_flags/ssse3.c
@@ -8,17 +8,23 @@
 #include "tests.h"
 
 #ifdef __SSSE3__
-void ssse3(){
+int ssse3(){
 	__ma128i v1;
 	for (int i = 16;i >= 0; i--){
-		v1.ui8[i] = -i;
+		v1.i8[i] = -i;
 	}
 	__ma128i vo;
 	vo.i = _mm_abs_epi8(v1.i);
-	printf("[%d]\n", vo.ui8[4]);
+	if (abs(v1.i8[4]) == vo.i8[4]){
+		return 0;
+	}else{
+		printf("Correct: %d result: %d\n", abs(v1.i8[4]), vo.i8[4]);
+		return -1;
+	}
 }
 #else
-void ssse3(){
+int ssse3(){
 	printf("SSSE3 is not supported.");
+	return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/stress.c b/client/virt/deps/test_cpu_flags/stress.c
index cad505b..5b0a9ed 100644
--- a/client/virt/deps/test_cpu_flags/stress.c
+++ b/client/virt/deps/test_cpu_flags/stress.c
@@ -63,6 +63,12 @@ void stress(inst in) {
 			pclmul();
 		if (in.rdrand)
 			rdrand();
+		if (in.fma4)
+			fma4();
+		if (in.xop)
+			xop();
+		if (in.sse4a)
+			sse4a();
 	}
 
 	int r = rand()%size;
diff --git a/client/virt/deps/test_cpu_flags/tests.h b/client/virt/deps/test_cpu_flags/tests.h
index a009923..b581864 100644
--- a/client/virt/deps/test_cpu_flags/tests.h
+++ b/client/virt/deps/test_cpu_flags/tests.h
@@ -10,19 +10,26 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <immintrin.h>
+//#include <immintrin.h>
+#include <x86intrin.h>
 #include <stdint.h>
 #include <omp.h>
+#include <float.h>
+#include <math.h>
+
 
 typedef struct{
 	int  num_threads;
 	char sse3;
 	char ssse3;
 	char sse4;
+	char sse4a;
 	char avx;
 	char aes;
 	char pclmul;
 	char rdrand;
+	char fma4;
+	char xop;
 } inst;
 
 typedef uint16_t auint16_t __attribute__ ((aligned(16)));
@@ -30,7 +37,10 @@ typedef uint16_t auint16_t __attribute__ ((aligned(16)));
 typedef union __attribute__ ((aligned(16))){
 	__m128i i;
 	uint64_t ui64[2];
+	uint32_t ui32[4];
+	uint16_t ui16[8];
 	uint8_t ui8[16];
+	int8_t i8[16];
 } __ma128i;
 
 typedef union __attribute__ ((aligned(32))){
@@ -40,14 +50,26 @@ typedef union __attribute__ ((aligned(32))){
 	double d64[2];
 } __ma128f;
 
-void aes();
-void pclmul();
-void rdrand();
+#ifdef __AVX__
+typedef union __attribute__ ((aligned(32))){
+	__m256 f;
+	__m256d d;
+	float f32[8];
+	double d64[4];
+} __ma256;
+#endif
+
 
-void avx();
-void sse4();
-void sse3();
-void ssse3();
+int aes();
+int pclmul();
+int rdrand();
+int avx();
+int sse4();
+int sse4a();
+int sse3();
+int ssse3();
+int fma4();
+int xop();
 void stress(inst in);
 
 
diff --git a/client/virt/deps/test_cpu_flags/xop.c b/client/virt/deps/test_cpu_flags/xop.c
new file mode 100644
index 0000000..ef01dde
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/xop.c
@@ -0,0 +1,48 @@
+/*
+ * xop.c
+ *
+ *  Created on: Nov 29, 2011
+ *      Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __XOP__
+
+int xop(){
+	__ma128i a, b, selector, d;
+	int i;
+	a.ui64[1] = 0xccccccccccccccccll;
+	a.ui64[0] = 0x8888888888888888ll;
+	b.ui64[1] = 0x3333333333333333ll;
+	b.ui64[0] = 0x7777777777777777ll;
+	selector.ui64[1] = 0xfedcba9876543210ll;
+	selector.ui64[0] = 0x0123456789abcdefll;
+	d.i = _mm_cmov_si128(a.i, b.i, selector.i);
+	printf("a:        %016I64x %016I64x\n",
+			  a.ui64[1], a.ui64[0]);
+	printf("b:        %016I64x %016I64x\n",
+			  b.ui64[1], b.ui64[0]);
+	printf("selector  %016I64x %016I64x\n",
+			  selector.ui64[1], selector.ui64[0]);
+	printf("result:   %016I64x %016I64x\n",
+			  d.ui64[1], d.ui64[0]);
+
+	for (int i = 0; i < 4; i++) {
+		a.ui8[i] = -128;
+		a.ui8[i+4] = i-128;
+		a.ui8[i+8] = 10*i;
+		a.ui8[i+12] = 127;
+	}
+	d.i = _mm_haddd_epi8(a.i);
+	for (int i = 0; i < 4; i++) printf(" %d", d.ui32[i]);
+	printf("\n");
+	return 0;
+}
+
+#endif
+#ifndef __XOP__
+int xop(){
+	printf("XOP is not supported.");
+	return 0;
+}
+#endif
-- 
1.7.7.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux