Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit 1684f7fd9047c7405264f462f76e1135c563ec33:

  Add missing .help string for io_size option (2017-01-03 10:10:58 -0700)

are available in the git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 847d544cce05157ec36f50b8214b26aff83aef01:

  Style cleanups for arm crc32c hw support (2017-01-04 19:44:35 -0700)

----------------------------------------------------------------
Jens Axboe (1):
      Style cleanups for arm crc32c hw support

wei xiao (1):
      Add arm64 hardware assisted crc32c support

 HOWTO               |   5 +++
 Makefile            |   4 +-
 arch/arch-aarch64.h |   4 ++
 configure           |  23 +++++++++++
 crc/crc32c-arm64.c  | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 crc/crc32c.h        |  14 +++++++
 crc/test.c          |   1 +
 lib/bloom.c         |   1 +
 options.c           |   4 ++
 verify.c            |   2 +
 verify.h            |   1 +
 11 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 crc/crc32c-arm64.c

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO
index 4354e46..4cc733f 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1514,6 +1514,11 @@ verify=str	If writing to a file, fio can verify the file contents
 				back to regular software crc32c, if not
 				supported by the system.
 
+			crc32c-arm64 Use hardware assisted crc32c calculation
+				provided on CRC enabled ARM 64-bits processors.
+				Falls back to regular software crc32c, if not
+				supported by the system.
+
 			crc32	Use a crc32 sum of the data area and store
 				it in the header of each block.
 
diff --git a/Makefile b/Makefile
index 4c64168..ad02d93 100644
--- a/Makefile
+++ b/Makefile
@@ -234,10 +234,10 @@ endif
 T_DEDUPE_OBJS = t/dedupe.o
 T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
 		lib/memalign.o lib/bloom.o t/debug.o crc/xxhash.o t/arch.o \
-		crc/murmur3.o crc/crc32c.o crc/crc32c-intel.o crc/fnv.o
+		crc/murmur3.o crc/crc32c.o crc/crc32c-intel.o crc/crc32c-arm64.o crc/fnv.o
 T_DEDUPE_PROGS = t/fio-dedupe
 
-T_VS_OBJS = t/verify-state.o t/log.o crc/crc32c.o crc/crc32c-intel.o t/debug.o
+T_VS_OBJS = t/verify-state.o t/log.o crc/crc32c.o crc/crc32c-intel.o crc/crc32c-arm64.o t/debug.o
 T_VS_PROGS = t/fio-verify-state
 
 T_PIPE_ASYNC_OBJS = t/read-to-pipe-async.o
diff --git a/arch/arch-aarch64.h b/arch/arch-aarch64.h
index 2a86cc5..0912a86 100644
--- a/arch/arch-aarch64.h
+++ b/arch/arch-aarch64.h
@@ -27,4 +27,8 @@ static inline int arch_ffz(unsigned long bitmask)
 
 #define ARCH_HAVE_FFZ
 
+#ifdef ARCH_HAVE_CRC_CRYPTO
+#define ARCH_HAVE_ARM64_CRC_CRYPTO
+#endif
+
 #endif
diff --git a/configure b/configure
index fc15782..7de88f8 100755
--- a/configure
+++ b/configure
@@ -342,6 +342,8 @@ elif check_define __s390__ ; then
   fi
 elif check_define __arm__ ; then
   cpu="arm"
+elif check_define __aarch64__ ; then
+  cpu="aarch64"
 elif check_define __hppa__ ; then
   cpu="hppa"
 else
@@ -362,6 +364,9 @@ case "$cpu" in
   armv*b|armv*l|arm)
     cpu="arm"
   ;;
+  aarch64)
+    cpu="arm64"
+  ;;
   hppa|parisc|parisc64)
     cpu="hppa"
   ;;
@@ -1780,6 +1785,24 @@ if compile_prog "" "" "bool"; then
 fi
 echo "bool                          $have_bool"
 
+##########################################
+# check march=armv8-a+crc+crypto
+march_armv8_a_crc_crypto="no"
+if test "$cpu" = "arm64" ; then
+  cat > $TMPC <<EOF
+int main(void)
+{
+  return 0;
+}
+EOF
+  if compile_prog "-march=armv8-a+crc+crypto" "" ""; then
+    march_armv8_a_crc_crypto="yes"
+    CFLAGS="$CFLAGS -march=armv8-a+crc+crypto -DARCH_HAVE_CRC_CRYPTO"
+  fi
+fi
+echo "march_armv8_a_crc_crypto      $march_armv8_a_crc_crypto"
+
+
 #############################################################################
 
 if test "$wordsize" = "64" ; then
diff --git a/crc/crc32c-arm64.c b/crc/crc32c-arm64.c
new file mode 100644
index 0000000..c3f42c7
--- /dev/null
+++ b/crc/crc32c-arm64.c
@@ -0,0 +1,115 @@
+#include "crc32c.h"
+
+#define CRC32C3X8(ITR) \
+	crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
+	crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
+	crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
+
+#define CRC32C7X3X8(ITR) do {\
+	CRC32C3X8((ITR)*7+0) \
+	CRC32C3X8((ITR)*7+1) \
+	CRC32C3X8((ITR)*7+2) \
+	CRC32C3X8((ITR)*7+3) \
+	CRC32C3X8((ITR)*7+4) \
+	CRC32C3X8((ITR)*7+5) \
+	CRC32C3X8((ITR)*7+6) \
+	} while(0)
+
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32             (1 << 7)
+#endif /* HWCAP_CRC32 */
+
+int crc32c_arm64_available = 0;
+
+#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
+
+#include <sys/auxv.h>
+#include <arm_acle.h>
+#include <arm_neon.h>
+
+static int crc32c_probed;
+
+/*
+ * Function to calculate reflected crc with PMULL Instruction
+ * crc done "by 3" for fixed input block size of 1024 bytes
+ */
+uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
+{
+	signed long len = length;
+	uint32_t crc = ~0;
+	uint32_t crc0, crc1, crc2;
+
+	/* Load two consts: K1 and K2 */
+	const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
+	uint64_t t0, t1;
+
+	while ((len -= 1024) >= 0) {
+		/* Do first 8 bytes here for better pipelining */
+		crc0 = __crc32cd(crc, *(const uint64_t *)data);
+		crc1 = 0;
+		crc2 = 0;
+		data += sizeof(uint64_t);
+
+		/* Process block inline
+		   Process crc0 last to avoid dependency with above */
+		CRC32C7X3X8(0);
+		CRC32C7X3X8(1);
+		CRC32C7X3X8(2);
+		CRC32C7X3X8(3);
+		CRC32C7X3X8(4);
+		CRC32C7X3X8(5);
+
+		data += 42*3*sizeof(uint64_t);
+
+		/* Merge crc0 and crc1 into crc2
+		   crc1 multiply by K2
+		   crc0 multiply by K1 */
+
+		t1 = (uint64_t)vmull_p64(crc1, k2);
+		t0 = (uint64_t)vmull_p64(crc0, k1);
+		crc = __crc32cd(crc2, *(const uint64_t *)data);
+		crc1 = __crc32cd(0, t1);
+		crc ^= crc1;
+		crc0 = __crc32cd(0, t0);
+		crc ^= crc0;
+
+		data += sizeof(uint64_t);
+	}
+
+	if (!(len += 1024))
+		return crc;
+
+	while ((len -= sizeof(uint64_t)) >= 0) {
+                crc = __crc32cd(crc, *(const uint64_t *)data);
+                data += sizeof(uint64_t);
+        }
+
+        /* The following is more efficient than the straight loop */
+        if (len & sizeof(uint32_t)) {
+                crc = __crc32cw(crc, *(const uint32_t *)data);
+                data += sizeof(uint32_t);
+        }
+        if (len & sizeof(uint16_t)) {
+                crc = __crc32ch(crc, *(const uint16_t *)data);
+                data += sizeof(uint16_t);
+        }
+        if (len & sizeof(uint8_t)) {
+                crc = __crc32cb(crc, *(const uint8_t *)data);
+        }
+
+	return crc;
+}
+
+void crc32c_arm64_probe(void)
+{
+	unsigned long hwcap;
+
+	if (!crc32c_probed) {
+		hwcap = getauxval(AT_HWCAP);
+		if (hwcap & HWCAP_CRC32)
+			crc32c_arm64_available = 1;
+		crc32c_probed = 1;
+	}
+}
+
+#endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */
diff --git a/crc/crc32c.h b/crc/crc32c.h
index 11bcf9c..5d66407 100644
--- a/crc/crc32c.h
+++ b/crc/crc32c.h
@@ -21,8 +21,19 @@
 #include "../arch/arch.h"
 
 extern uint32_t crc32c_sw(unsigned char const *, unsigned long);
+extern int crc32c_arm64_available;
 extern int crc32c_intel_available;
 
+#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
+extern uint32_t crc32c_arm64(unsigned char const *, unsigned long);
+extern void crc32c_arm64_probe(void);
+#else
+#define crc32c_arm64 crc32c_sw
+static inline void crc32c_arm64_probe(void)
+{
+}
+#endif
+
 #ifdef ARCH_HAVE_SSE4_2
 extern uint32_t crc32c_intel(unsigned char const *, unsigned long);
 extern void crc32c_intel_probe(void);
@@ -35,6 +46,9 @@ static inline void crc32c_intel_probe(void)
 
 static inline uint32_t fio_crc32c(unsigned char const *buf, unsigned long len)
 {
+	if (crc32c_arm64_available)
+		return crc32c_arm64(buf, len);
+
 	if (crc32c_intel_available)
 		return crc32c_intel(buf, len);
 
diff --git a/crc/test.c b/crc/test.c
index 300000d..78f19ac 100644
--- a/crc/test.c
+++ b/crc/test.c
@@ -291,6 +291,7 @@ int fio_crctest(const char *type)
 	int i, first = 1;
 	void *buf;
 
+	crc32c_arm64_probe();
 	crc32c_intel_probe();
 
 	if (!type)
diff --git a/lib/bloom.c b/lib/bloom.c
index fa38db9..7a9ebaa 100644
--- a/lib/bloom.c
+++ b/lib/bloom.c
@@ -65,6 +65,7 @@ struct bloom *bloom_new(uint64_t entries)
 	struct bloom *b;
 	size_t no_uints;
 
+	crc32c_arm64_probe();
 	crc32c_intel_probe();
 
 	b = malloc(sizeof(*b));
diff --git a/options.c b/options.c
index 1ca16e8..5886c50 100644
--- a/options.c
+++ b/options.c
@@ -2647,6 +2647,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 			    .oval = VERIFY_CRC32C,
 			    .help = "Use crc32c checksums for verification (hw assisted, if available)",
 			  },
+			  { .ival = "crc32c-arm64",
+			    .oval = VERIFY_CRC32C,
+			    .help = "Use crc32c checksums for verification (hw assisted, if available)",
+			  },
 			  { .ival = "crc32c",
 			    .oval = VERIFY_CRC32C,
 			    .help = "Use crc32c checksums for verification (hw assisted, if available)",
diff --git a/verify.c b/verify.c
index 790ab31..8733feb 100644
--- a/verify.c
+++ b/verify.c
@@ -1210,7 +1210,9 @@ nothing:
 void fio_verify_init(struct thread_data *td)
 {
 	if (td->o.verify == VERIFY_CRC32C_INTEL ||
+	    td->o.verify == VERIFY_CRC32C_ARM64 ||
 	    td->o.verify == VERIFY_CRC32C) {
+		crc32c_arm64_probe();
 		crc32c_intel_probe();
 	}
 }
diff --git a/verify.h b/verify.h
index deb161e..8d40ff6 100644
--- a/verify.h
+++ b/verify.h
@@ -15,6 +15,7 @@ enum {
 	VERIFY_CRC64,			/* crc64 sum data blocks */
 	VERIFY_CRC32,			/* crc32 sum data blocks */
 	VERIFY_CRC32C,			/* crc32c sum data blocks */
+	VERIFY_CRC32C_ARM64,		/* crc32c sum data blocks with hw */
 	VERIFY_CRC32C_INTEL,		/* crc32c sum data blocks with hw */
 	VERIFY_CRC16,			/* crc16 sum data blocks */
 	VERIFY_CRC7,			/* crc7 sum data blocks */
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux