Re: memcpy test: results from adding sse and avx tests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 3/5/18 10:22 PM, Jens Axboe wrote:
> On 1/18/18 1:36 PM, Rebecca Cran wrote:
>> On 01/18/2018 10:47 AM, Jens Axboe wrote:
>>>
>>> Adding the memcpy for avx/sse to the test case might be interesting
>>> though, just to be able to compare performances with builtin
>>> memcpy/memmove on a given system.
>>
>> I've attached the patch. I think it should work on all Intel systems 
>> newer than 2012. It causes a segfault on my Atom E3826 (Baytrail) 
>> because it doesn't support AVX.
>>
>> I've tested it using Clang 3.8, GCC 4.8 and 7.x on openSUSE, and Clang 
>> 4.0 on FreeBSD 11.1.
> 
> I'm worried that this will break some platforms. A few comments that
> might help rectify that:
> 
> 1) Use fio_memalign() instead of relying on a c11 function
> 2) Add a configure check for the avx and sse, separately.
>    This includes -msse for the compiler, includes for the
>     xmm/imm intrinsics, etc.
> 
> I don't care if it segfaults if avx/sse isn't there, but I don't
> want to risk failing a build on some rarely tested platform, just
> to have this as part of the memcpy test. The risk/reward just
> isn't there then.

Something like the below. It fixes up the two above points, and also
corrects some style and declaring 'int i' within the for-loop.


diff --git a/Makefile b/Makefile
index c25b4222e437..1f5cb429f2ef 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,13 @@ LIBS	+= -lm $(EXTLIBS)
 PROGS	= fio
 SCRIPTS = $(addprefix $(SRCDIR)/,tools/fio_generate_plots tools/plot/fio2gnuplot tools/genfio tools/fiologparser.py tools/hist/fiologparser_hist.py tools/fio_jsonplus_clat2csv)
 
+ifdef CONFIG_HAVE_SSE
+CFLAGS	+= -msse
+endif
+ifdef CONFIG_HAVE_AVX
+CFLAGS	+= -mavx
+endif
+
 ifndef CONFIG_FIO_NO_OPT
   CFLAGS += -O3 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
 endif
diff --git a/configure b/configure
index 2e8eb180ef50..61ed3d6d1f1b 100755
--- a/configure
+++ b/configure
@@ -2131,6 +2131,48 @@ if compile_prog "" "" "mkdir(a, b)"; then
 fi
 print_config "mkdir(a, b)" "$mkdir_two"
 
+##########################################
+# sse probe
+sse="no"
+cat > $TMPC << EOF
+#include <xmmintrin.h>
+#include <immintrin.h>
+int main(int argc, char **argv)
+{
+  __m128 val;
+  float const *src = NULL;
+  float *dst = NULL;
+  val = _mm_load_ps(src);
+  _mm_store_ps(dst, val);
+  return 0;
+}
+EOF
+if compile_prog "-msse" "" "sse"; then
+  sse="yes"
+fi
+print_config "SSE (compiler)" "$sse"
+
+##########################################
+# avx probe
+avx="no"
+cat > $TMPC << EOF
+#include <xmmintrin.h>
+#include <immintrin.h>
+int main(int argc, char **argv)
+{
+  __m256 val;
+  float const *src = NULL;
+  float *dst = NULL;
+  val = _mm256_load_ps(src);
+  _mm256_store_ps(dst, val);
+  return 0;
+}
+EOF
+if compile_prog "-mavx" "" "avx"; then
+  avx="yes"
+fi
+print_config "AVX (compiler)" "$avx"
+
 #############################################################################
 
 if test "$wordsize" = "64" ; then
@@ -2377,6 +2419,12 @@ fi
 if test "$mkdir_two" = "yes" ; then
   output_sym "CONFIG_HAVE_MKDIR_TWO"
 fi
+if test "$sse" = "yes" ; then
+  output_sym "CONFIG_HAVE_SSE"
+fi
+if test "$avx" = "yes" ; then
+  output_sym "CONFIG_HAVE_AVX"
+fi
 
 echo "LIBS+=$LIBS" >> $config_host_mak
 echo "GFIO_LIBS+=$GFIO_LIBS" >> $config_host_mak
diff --git a/lib/memcpy.c b/lib/memcpy.c
index 00e65aa7d50a..b978a1197fd2 100644
--- a/lib/memcpy.c
+++ b/lib/memcpy.c
@@ -2,9 +2,15 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if defined(CONFIG_HAVE_SSE) || defined(CONFIG_HAVE_AVX)
+#include <xmmintrin.h>
+#include <immintrin.h>
+#endif
+
 #include "memcpy.h"
 #include "rand.h"
 #include "../fio_time.h"
+#include "../lib/memalign.h"
 #include "../gettime.h"
 #include "../fio.h"
 
@@ -80,6 +86,12 @@ enum {
 	T_MEMMOVE	= 1U << 1,
 	T_SIMPLE	= 1U << 2,
 	T_HYBRID	= 1U << 3,
+#if defined(CONFIG_HAVE_SSE)
+	T_SSE		= 1U << 4,
+#endif
+#if defined(CONFIG_HAVE_AVX)
+	T_AVX		= 1U << 5,
+#endif
 };
 
 #define do_test(test, fn)	do {					\
@@ -122,6 +134,46 @@ static void simple_memcpy(void *dst, void const *src, size_t len)
 		*d++ = *s++;
 }
 
+#if defined(CONFIG_HAVE_SSE)
+static void sse_memcpy(void *dst, void const *src, size_t len)
+{
+	__m128 val;
+	float *d = dst;
+	float const *s = src;
+	int i;
+
+	if (len < sizeof(__m128))
+		return;
+
+	for (i = 0; i < len; i += sizeof(__m128)) {
+		val = _mm_load_ps(s);
+		_mm_store_ps(d, val);
+		d += sizeof(__m128) / sizeof(float);
+		s += sizeof(__m128) / sizeof(float);
+	}
+}
+#endif
+
+#if defined(CONFIG_HAVE_AVX)
+static void avx_memcpy(void *dst, void const *src, size_t len)
+{
+	__m256 val;
+	float *d = dst;
+	float const *s = src;
+	int i;
+
+	if (len < sizeof(__m256))
+		return;
+
+	for (i = 0; i < len; i += sizeof(__m256)) {
+		val = _mm256_load_ps(s);
+		_mm256_store_ps(d, val);
+		d += sizeof(__m256) / sizeof(float);
+		s += sizeof(__m256) / sizeof(float);
+	}
+}
+#endif
+
 static void t_simple(struct memcpy_test *test)
 {
 	do_test(test, simple_memcpy);
@@ -135,6 +187,20 @@ static void t_hybrid(struct memcpy_test *test)
 		do_test(test, memcpy);
 }
 
+#if defined(CONFIG_HAVE_SSE)
+static void t_sse(struct memcpy_test *test)
+{
+	do_test(test, sse_memcpy);
+}
+#endif
+
+#if defined(CONFIG_HAVE_AVX)
+static void t_avx(struct memcpy_test *test)
+{
+	do_test(test, avx_memcpy);
+}
+#endif
+
 static struct memcpy_type t[] = {
 	{
 		.name = "memcpy",
@@ -156,6 +222,20 @@ static struct memcpy_type t[] = {
 		.mask = T_HYBRID,
 		.fn = t_hybrid,
 	},
+#if defined(CONFIG_HAVE_SSE)
+	{
+		.name = "sse",
+		.mask = T_SSE,
+		.fn = t_sse,
+	},
+#endif
+#if defined(CONFIG_HAVE_AVX)
+	{
+		.name = "avx",
+		.mask = T_AVX,
+		.fn = t_avx,
+	},
+#endif
 	{
 		.name = NULL,
 	},
@@ -200,8 +280,8 @@ static int setup_tests(void)
 	void *src, *dst;
 	int i;
 
-	src = malloc(BUF_SIZE);
-	dst = malloc(BUF_SIZE);
+	src = fio_memalign(64, BUF_SIZE);
+	dst = fio_memalign(64, BUF_SIZE);
 	if (!src || !dst) {
 		free(src);
 		free(dst);
@@ -222,8 +302,8 @@ static int setup_tests(void)
 
 static void free_tests(void)
 {
-	free(tests[0].src);
-	free(tests[0].dst);
+	fio_memfree(tests[0].src, BUF_SIZE);
+	fio_memfree(tests[0].dst, BUF_SIZE);
 }
 
 int fio_memcpy_test(const char *type)

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux