[PATCH RFC 3/3] crypto: arm64/aegis128 - implement plain NEON version

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Provide a version of the core AES transform to the aegis128 SIMD
code that does not rely on the special AES instructions, but uses
plain NEON instructions instead. This allows the SIMD version of
the aegis128 driver to be used on arm64 systems that do not
implement those instructions (which are not mandatory in the
architecture), such as the Raspberry Pi 3.

Cc: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
---
 crypto/Makefile              |  5 ++
 crypto/aegis128-neon-inner.c | 53 ++++++++++++++++++++
 crypto/aegis128-neon.c       | 16 +++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/crypto/Makefile b/crypto/Makefile
index 99a9fa9087d1..c3760c7616ac 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -99,6 +99,11 @@ aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
 endif
 ifeq ($(ARCH),arm64)
 CFLAGS_aegis128-neon-inner.o += -ffreestanding -mcpu=generic+crypto
+CFLAGS_aegis128-neon-inner.o += -ffixed-q14 -ffixed-q15
+CFLAGS_aegis128-neon-inner.o += -ffixed-q16 -ffixed-q17 -ffixed-q18 -ffixed-q19
+CFLAGS_aegis128-neon-inner.o += -ffixed-q20 -ffixed-q21 -ffixed-q22 -ffixed-q23
+CFLAGS_aegis128-neon-inner.o += -ffixed-q24 -ffixed-q25 -ffixed-q26 -ffixed-q27
+CFLAGS_aegis128-neon-inner.o += -ffixed-q28 -ffixed-q29 -ffixed-q30 -ffixed-q31
 CFLAGS_REMOVE_aegis128-neon-inner.o += -mgeneral-regs-only
 aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
 endif
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index 6aca2f425b6d..7aa4cef3c2de 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -17,6 +17,8 @@
 
 #include <stddef.h>
 
+extern int aegis128_have_aes_insn;
+
 void *memcpy(void *dest, const void *src, size_t n);
 void *memset(void *s, int c, size_t n);
 
@@ -49,6 +51,32 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
 {
 	uint8x16_t z = {};
 
+#ifdef CONFIG_ARM64
+	if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
+		uint8x16_t v;
+
+		// shift rows
+		asm("tbl %0.16b, {%0.16b}, v14.16b" : "+w"(w));
+
+		// sub bytes
+		asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
+
+		// mix columns
+		w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+		w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
+		asm("tbl %0.16b, {%1.16b}, v15.16b" : "=w"(v) : "w"(v ^ w));
+		w ^= v;
+
+		return w;
+	}
+#endif
+
 	/*
 	 * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics
 	 * to force the compiler to issue the aese/aesmc instructions in pairs.
@@ -149,3 +177,28 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 
 	aegis128_save_state_neon(st, state);
 }
+
+#ifdef CONFIG_ARM64
+void crypto_aegis128_init_neon(void)
+{
+	u64 tmp;
+
+	asm volatile(
+	    "adrp		%0, crypto_aes_sbox		\n\t"
+	    "add		%0, %0, :lo12:crypto_aes_sbox	\n\t"
+	    "mov		v14.16b, %1.16b			\n\t"
+	    "mov		v15.16b, %2.16b			\n\t"
+	    "ld1		{v16.16b-v19.16b}, [%0], #64	\n\t"
+	    "ld1		{v20.16b-v23.16b}, [%0], #64	\n\t"
+	    "ld1		{v24.16b-v27.16b}, [%0], #64	\n\t"
+	    "ld1		{v28.16b-v31.16b}, [%0]		\n\t"
+	    : "=&r"(tmp)
+	    : "w"((uint8x16_t){ // shift rows permutation vector
+			0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+			0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, }),
+	      "w"((uint8x16_t){ // ror32 permutation vector
+			0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+			0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,	})
+	);
+}
+#endif
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c
index c1c0a1686f67..72f9d48e4963 100644
--- a/crypto/aegis128-neon.c
+++ b/crypto/aegis128-neon.c
@@ -14,14 +14,24 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
 void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 					unsigned int size);
 
+void crypto_aegis128_init_neon(void);
+
+int aegis128_have_aes_insn __ro_after_init;
+
 bool crypto_aegis128_have_simd(void)
 {
-	return cpu_have_feature(cpu_feature(AES));
+	if (cpu_have_feature(cpu_feature(AES))) {
+		aegis128_have_aes_insn = 1;
+		return true;
+	}
+	return IS_ENABLED(CONFIG_ARM64);
 }
 
 void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
 {
 	kernel_neon_begin();
+	if (IS_ENABLED(CONFIG_ARM64) && !aegis128_have_aes_insn)
+		crypto_aegis128_init_neon();
 	crypto_aegis128_update_neon(state, msg);
 	kernel_neon_end();
 }
@@ -30,6 +40,8 @@ void crypto_aegis128_encrypt_chunk_simd(union aegis_block *state, u8 *dst,
 					const u8 *src, unsigned int size)
 {
 	kernel_neon_begin();
+	if (IS_ENABLED(CONFIG_ARM64) && !aegis128_have_aes_insn)
+		crypto_aegis128_init_neon();
 	crypto_aegis128_encrypt_chunk_neon(state, dst, src, size);
 	kernel_neon_end();
 }
@@ -38,6 +50,8 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
 					const u8 *src, unsigned int size)
 {
 	kernel_neon_begin();
+	if (IS_ENABLED(CONFIG_ARM64) && !aegis128_have_aes_insn)
+		crypto_aegis128_init_neon();
 	crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
 	kernel_neon_end();
 }
-- 
2.17.1




[Index of Archives]     [Kernel]     [Gnu Classpath]     [Gnu Crypto]     [DM Crypt]     [Netfilter]     [Bugtraq]

  Powered by Linux