Re: memcpy test: results from adding sse and avx tests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/18/2018 10:47 AM, Jens Axboe wrote:

Adding the memcpy for avx/sse to the test case might be interesting
though, just to be able to compare performances with builtin
memcpy/memmove on a given system.

I've attached the patch. I think it should work on all Intel systems newer than 2012. It causes a segfault on my Atom E3826 (Baytrail) because it doesn't support AVX.

I've tested it using Clang 3.8, GCC 4.8 and 7.x on openSUSE, and Clang 4.0 on FreeBSD 11.1.


--
Rebecca

>From f9fa5d6fc83d5d924ac2cf9532d51a3b1d81c9ff Mon Sep 17 00:00:00 2001
From: Rebecca Cran <rebecca@xxxxxxxxxxxx>
Date: Thu, 18 Jan 2018 12:07:11 -0700
Subject: [PATCH] Add SSE and AVX tests to lib/memcpy.c and add -msse and -mavx
 to compiler flags

---
 Makefile     |  2 +-
 lib/memcpy.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 3ce60646..4e32f269 100644
--- a/Makefile
+++ b/Makefile
@@ -23,7 +23,7 @@ endif
 DEBUGFLAGS = -DFIO_INC_DEBUG
 CPPFLAGS= -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DFIO_INTERNAL $(DEBUGFLAGS)
 OPTFLAGS= -g -ffast-math
-CFLAGS	= -std=gnu99 -Wwrite-strings -Wall -Wdeclaration-after-statement $(OPTFLAGS) $(EXTFLAGS) $(BUILD_CFLAGS) -I. -I$(SRCDIR)
+CFLAGS	= -msse -mavx -std=gnu99 -Wwrite-strings -Wall -Wdeclaration-after-statement $(OPTFLAGS) $(EXTFLAGS) $(BUILD_CFLAGS) -I. -I$(SRCDIR)
 LIBS	+= -lm $(EXTLIBS)
 PROGS	= fio
 SCRIPTS = $(addprefix $(SRCDIR)/,tools/fio_generate_plots tools/plot/fio2gnuplot tools/genfio tools/fiologparser.py tools/hist/fiologparser_hist.py tools/fio_jsonplus_clat2csv)
diff --git a/lib/memcpy.c b/lib/memcpy.c
index 00e65aa7..1fb22dad 100644
--- a/lib/memcpy.c
+++ b/lib/memcpy.c
@@ -1,6 +1,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <xmmintrin.h>
+#include <immintrin.h>
 
 #include "memcpy.h"
 #include "rand.h"
@@ -80,6 +82,8 @@ enum {
 	T_MEMMOVE	= 1U << 1,
 	T_SIMPLE	= 1U << 2,
 	T_HYBRID	= 1U << 3,
+	T_SSE		= 1U << 4,
+	T_AVX		= 1U << 5,
 };
 
 #define do_test(test, fn)	do {					\
@@ -122,6 +126,43 @@ static void simple_memcpy(void *dst, void const *src, size_t len)
 		*d++ = *s++;
 }
 
+static void sse_memcpy(void *dst, void const *src, size_t len)
+{
+	__m128 val;
+	float *d = dst;
+	float const *s = src;
+
+	if (len < sizeof(__m128))
+		return;
+
+
+	for (int i = 0; i < len; i += sizeof(__m128))
+	{
+		val = _mm_load_ps(s);
+		_mm_store_ps(d, val);
+		d += sizeof(__m128) / sizeof(float);
+		s += sizeof(__m128) / sizeof(float);
+	}
+}
+
+static void avx_memcpy(void *dst, void const *src, size_t len)
+{
+	__m256 val;
+	float *d = dst;
+	float const *s = src;
+
+	if (len < sizeof(__m256))
+		return;
+
+	for (int i = 0; i < len; i += sizeof(__m256))
+	{
+		val = _mm256_load_ps(s);
+		_mm256_store_ps(d, val);
+		d += sizeof(__m256) / sizeof(float);
+		s += sizeof(__m256) / sizeof(float);
+	}
+}
+
 static void t_simple(struct memcpy_test *test)
 {
 	do_test(test, simple_memcpy);
@@ -135,6 +176,16 @@ static void t_hybrid(struct memcpy_test *test)
 		do_test(test, memcpy);
 }
 
+static void t_sse(struct memcpy_test *test)
+{
+	do_test(test, sse_memcpy);
+}
+
+static void t_avx(struct memcpy_test *test)
+{
+	do_test(test, avx_memcpy);
+}
+
 static struct memcpy_type t[] = {
 	{
 		.name = "memcpy",
@@ -157,6 +208,16 @@ static struct memcpy_type t[] = {
 		.fn = t_hybrid,
 	},
 	{
+		.name = "sse",
+		.mask = T_SSE,
+		.fn = t_sse,
+	},
+	{
+		.name = "avx",
+		.mask = T_AVX,
+		.fn = t_avx,
+	},
+	{
 		.name = NULL,
 	},
 };
@@ -200,8 +261,8 @@ static int setup_tests(void)
 	void *src, *dst;
 	int i;
 
-	src = malloc(BUF_SIZE);
-	dst = malloc(BUF_SIZE);
+	src = aligned_alloc(64, BUF_SIZE);
+	dst = aligned_alloc(64, BUF_SIZE);
 	if (!src || !dst) {
 		free(src);
 		free(dst);
-- 
2.13.6


[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux