Re: memcpy test: results from adding sse and avx tests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 3/6/18 9:45 AM, Jens Axboe wrote:
> On 3/6/18 9:43 AM, Sitsofe Wheeler wrote:
>> On 6 March 2018 at 16:22, Jens Axboe <axboe@xxxxxxxxx> wrote:
>>>
>>> +##########################################
>>> +# sse probe
>>> +sse="no"
>>> +cat > $TMPC << EOF
>>> +#include <xmmintrin.h>
>>> +#include <immintrin.h>
>>> +int main(int argc, char **argv)
>>> +{
>>> +  __m128 val;
>>> +  float const *src = NULL;
>>> +  float *dst = NULL;
>>> +  val = _mm_load_ps(src);
>>> +  _mm_store_ps(dst, val);
>>> +  return 0;
>>> +}
>>> +EOF
>>> +if compile_prog "-msse" "" "sse"; then
>>> +  sse="yes"
>>> +fi
>>> +print_config "SSE (compiler)" "$sse"
>>
>> According to https://stackoverflow.com/questions/28939652/how-to-detect-sse-sse2-avx-avx2-avx-512-avx-128-fma-kcvi-availability-at-compile/28939692#28939692
>> in gcc and clang you can just check for the appropriate define (e.g.
>> __SSE2__ ) if you're only interested in what's available at compile
>> time...
> 
> I never really trust that, since there are always cases where that
> isn't true. A configure test with the meat of the code and types
> is always going to be 100% reliable.

BTW, another (and probably more problematic) issue is that the
compiler may well have support for sse/avx/avx512, while the
machine it's run on does not. If we enable the instruction set
based on that, we could be seeing illegal instructions outside
just the memcpy test. And that would be a concern, since it would
prevent fio from running at all.

I think we have to make it something like the below because of that.

diff --git a/Makefile b/Makefile
index c25b4222e437..26301e2dbf71 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,16 @@ LIBS	+= -lm $(EXTLIBS)
 PROGS	= fio
 SCRIPTS = $(addprefix $(SRCDIR)/,tools/fio_generate_plots tools/plot/fio2gnuplot tools/genfio tools/fiologparser.py tools/hist/fiologparser_hist.py tools/fio_jsonplus_clat2csv)
 
+ifdef CONFIG_HAVE_SSE
+CFLAGS	+= -msse
+endif
+ifdef CONFIG_HAVE_AVX
+CFLAGS	+= -mavx
+endif
+ifdef CONFIG_HAVE_AVX512
+CFLAGS	+= -mavx512f
+endif
+
 ifndef CONFIG_FIO_NO_OPT
   CFLAGS += -O3 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
 endif
diff --git a/configure b/configure
index 2e8eb180ef50..c82331a8160b 100755
--- a/configure
+++ b/configure
@@ -145,6 +145,8 @@ devdax="no"
 pmem="no"
 disable_lex=""
 disable_pmem="no"
+enable_sse="no"
+enable_avx="no"
 prefix=/usr/local
 
 # parse options
@@ -195,6 +197,12 @@ for opt do
   ;;
   --enable-cuda) enable_cuda="yes"
   ;;
+  --enable-sse) enable_sse="yes"
+  ;;
+  --enable-avx) enable_avx="yes"
+  ;;
+  --enable-avx512) enable_avx512="yes"
+  ;;
   --help)
     show_help="yes"
     ;;
@@ -224,6 +232,9 @@ if test "$show_help" = "yes" ; then
   echo "--disable-shm           Disable SHM support"
   echo "--disable-optimizations Don't enable compiler optimizations"
   echo "--enable-cuda           Enable GPUDirect RDMA support"
+  echo "--enable-sse            Enable SSE"
+  echo "--enable-avx            Enable AVX"
+  echo "--enable-avx512         Enable AVX512"
   exit $exit_val
 fi
 
@@ -2131,6 +2142,69 @@ if compile_prog "" "" "mkdir(a, b)"; then
 fi
 print_config "mkdir(a, b)" "$mkdir_two"
 
+##########################################
+# sse probe
+sse="no"
+cat > $TMPC << EOF
+#include <xmmintrin.h>
+#include <immintrin.h>
+int main(int argc, char **argv)
+{
+  __m128 val;
+  float const *src = NULL;
+  float *dst = NULL;
+  val = _mm_load_ps(src);
+  _mm_store_ps(dst, val);
+  return 0;
+}
+EOF
+if test "$enable_sse" = "yes" && compile_prog "-msse" "" "sse"; then
+  sse="yes"
+fi
+print_config "SSE (compiler)" "$sse"
+
+##########################################
+# avx probe
+avx="no"
+cat > $TMPC << EOF
+#include <xmmintrin.h>
+#include <immintrin.h>
+int main(int argc, char **argv)
+{
+  __m256 val;
+  float const *src = NULL;
+  float *dst = NULL;
+  val = _mm256_load_ps(src);
+  _mm256_store_ps(dst, val);
+  return 0;
+}
+EOF
+if test "$enable_avx" = "yes" && compile_prog "-mavx" "" "avx"; then
+  avx="yes"
+fi
+print_config "AVX (compiler)" "$avx"
+
+##########################################
+# avx512 probe
+avx512="no"
+cat > $TMPC << EOF
+#include <xmmintrin.h>
+#include <immintrin.h>
+int main(int argc, char **argv)
+{
+  __m512 val;
+  float const *src = NULL;
+  float *dst = NULL;
+  val = _mm512_load_ps(src);
+  _mm512_store_ps(dst, val);
+  return 0;
+}
+EOF
+if test "$enable_avx512" = "yes" && compile_prog "-mavx512f" "" "avx512"; then
+  avx512="yes"
+fi
+print_config "AVX512 (compiler)" "$avx512"
+
 #############################################################################
 
 if test "$wordsize" = "64" ; then
@@ -2377,6 +2451,15 @@ fi
 if test "$mkdir_two" = "yes" ; then
   output_sym "CONFIG_HAVE_MKDIR_TWO"
 fi
+if test "$sse" = "yes" ; then
+  output_sym "CONFIG_HAVE_SSE"
+fi
+if test "$avx" = "yes" ; then
+  output_sym "CONFIG_HAVE_AVX"
+fi
+if test "$avx512" = "yes" ; then
+  output_sym "CONFIG_HAVE_AVX512"
+fi
 
 echo "LIBS+=$LIBS" >> $config_host_mak
 echo "GFIO_LIBS+=$GFIO_LIBS" >> $config_host_mak
diff --git a/lib/memcpy.c b/lib/memcpy.c
index 00e65aa7d50a..bb3e579baf9d 100644
--- a/lib/memcpy.c
+++ b/lib/memcpy.c
@@ -2,9 +2,15 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if defined(CONFIG_HAVE_SSE) || defined(CONFIG_HAVE_AVX) || defined(CONFIG_HAVE_AVX512)
+#include <xmmintrin.h>
+#include <immintrin.h>
+#endif
+
 #include "memcpy.h"
 #include "rand.h"
 #include "../fio_time.h"
+#include "../lib/memalign.h"
 #include "../gettime.h"
 #include "../fio.h"
 
@@ -80,6 +86,15 @@ enum {
 	T_MEMMOVE	= 1U << 1,
 	T_SIMPLE	= 1U << 2,
 	T_HYBRID	= 1U << 3,
+#if defined(CONFIG_HAVE_SSE)
+	T_SSE		= 1U << 4,
+#endif
+#if defined(CONFIG_HAVE_AVX)
+	T_AVX		= 1U << 5,
+#endif
+#if defined(CONFIG_HAVE_AVX512)
+	T_AVX512	= 1U << 6,
+#endif
 };
 
 #define do_test(test, fn)	do {					\
@@ -122,6 +137,66 @@ static void simple_memcpy(void *dst, void const *src, size_t len)
 		*d++ = *s++;
 }
 
+#if defined(CONFIG_HAVE_SSE)
+static void sse_memcpy(void *dst, void const *src, size_t len)
+{
+	__m128 val;
+	float *d = dst;
+	float const *s = src;
+	int i;
+
+	if (len < sizeof(__m128))
+		return;
+
+	for (i = 0; i < len; i += sizeof(__m128)) {
+		val = _mm_load_ps(s);
+		_mm_store_ps(d, val);
+		d += sizeof(__m128) / sizeof(float);
+		s += sizeof(__m128) / sizeof(float);
+	}
+}
+#endif
+
+#if defined(CONFIG_HAVE_AVX)
+static void avx_memcpy(void *dst, void const *src, size_t len)
+{
+	__m256 val;
+	float *d = dst;
+	float const *s = src;
+	int i;
+
+	if (len < sizeof(__m256))
+		return;
+
+	for (i = 0; i < len; i += sizeof(__m256)) {
+		val = _mm256_load_ps(s);
+		_mm256_store_ps(d, val);
+		d += sizeof(__m256) / sizeof(float);
+		s += sizeof(__m256) / sizeof(float);
+	}
+}
+#endif
+
+#if defined(CONFIG_HAVE_AVX512)
+static void avx512_memcpy(void *dst, void const *src, size_t len)
+{
+	__m512 val;
+	float *d = dst;
+	float const *s = src;
+	int i;
+
+	if (len < sizeof(__m512))
+		return;
+
+	for (i = 0; i < len; i += sizeof(__m512)) {
+		val = _mm512_load_ps(s);
+		_mm512_store_ps(d, val);
+		d += sizeof(__m512) / sizeof(float);
+		s += sizeof(__m512) / sizeof(float);
+	}
+}
+#endif
+
 static void t_simple(struct memcpy_test *test)
 {
 	do_test(test, simple_memcpy);
@@ -135,6 +210,27 @@ static void t_hybrid(struct memcpy_test *test)
 		do_test(test, memcpy);
 }
 
+#if defined(CONFIG_HAVE_SSE)
+static void t_sse(struct memcpy_test *test)
+{
+	do_test(test, sse_memcpy);
+}
+#endif
+
+#if defined(CONFIG_HAVE_AVX)
+static void t_avx(struct memcpy_test *test)
+{
+	do_test(test, avx_memcpy);
+}
+#endif
+
+#if defined(CONFIG_HAVE_AVX512)
+static void t_avx512(struct memcpy_test *test)
+{
+	do_test(test, avx512_memcpy);
+}
+#endif
+
 static struct memcpy_type t[] = {
 	{
 		.name = "memcpy",
@@ -156,6 +252,27 @@ static struct memcpy_type t[] = {
 		.mask = T_HYBRID,
 		.fn = t_hybrid,
 	},
+#if defined(CONFIG_HAVE_SSE)
+	{
+		.name = "sse",
+		.mask = T_SSE,
+		.fn = t_sse,
+	},
+#endif
+#if defined(CONFIG_HAVE_AVX)
+	{
+		.name = "avx",
+		.mask = T_AVX,
+		.fn = t_avx,
+	},
+#endif
+#if defined(CONFIG_HAVE_AVX512)
+	{
+		.name = "avx512",
+		.mask = T_AVX512,
+		.fn = t_avx512,
+	},
+#endif
 	{
 		.name = NULL,
 	},
@@ -200,8 +317,8 @@ static int setup_tests(void)
 	void *src, *dst;
 	int i;
 
-	src = malloc(BUF_SIZE);
-	dst = malloc(BUF_SIZE);
+	src = fio_memalign(64, BUF_SIZE);
+	dst = fio_memalign(64, BUF_SIZE);
 	if (!src || !dst) {
 		free(src);
 		free(dst);
@@ -222,8 +339,8 @@ static int setup_tests(void)
 
 static void free_tests(void)
 {
-	free(tests[0].src);
-	free(tests[0].dst);
+	fio_memfree(tests[0].src, BUF_SIZE);
+	fio_memfree(tests[0].dst, BUF_SIZE);
 }
 
 int fio_memcpy_test(const char *type)

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux