Align ARM's hw instruction based AES implementation with other versions that keep the key schedule in native endianness. This will allow us to merge the various implementations going forward. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> --- arch/arm/crypto/aes-ce-core.S | 20 ++++++++++---------- arch/arm/crypto/aes-ce-glue.c | 9 +++------ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index bc53bcaa772e..3692b8735ef7 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -91,19 +91,19 @@ .macro do_block, dround, fround cmp r3, #12 @ which key size? - vld1.8 {q10-q11}, [ip]! + vld1.32 {q10-q11}, [ip]! \dround q8, q9 - vld1.8 {q12-q13}, [ip]! + vld1.32 {q12-q13}, [ip]! \dround q10, q11 - vld1.8 {q10-q11}, [ip]! + vld1.32 {q10-q11}, [ip]! \dround q12, q13 - vld1.8 {q12-q13}, [ip]! + vld1.32 {q12-q13}, [ip]! \dround q10, q11 blo 0f @ AES-128: 10 rounds - vld1.8 {q10-q11}, [ip]! + vld1.32 {q10-q11}, [ip]! \dround q12, q13 beq 1f @ AES-192: 12 rounds - vld1.8 {q12-q13}, [ip] + vld1.32 {q12-q13}, [ip] \dround q10, q11 0: \fround q12, q13, q14 bx lr @@ -152,8 +152,8 @@ ENDPROC(aes_decrypt_3x) .macro prepare_key, rk, rounds add ip, \rk, \rounds, lsl #4 - vld1.8 {q8-q9}, [\rk] @ load first 2 round keys - vld1.8 {q14}, [ip] @ load last round key + vld1.32 {q8-q9}, [\rk] @ load first 2 round keys + vld1.32 {q14}, [ip] @ load last round key .endm /* @@ -508,8 +508,8 @@ ENDPROC(ce_aes_sub) * operation on round key *src */ ENTRY(ce_aes_invert) - vld1.8 {q0}, [r1] + vld1.32 {q0}, [r1] aesimc.8 q0, q0 - vst1.8 {q0}, [r0] + vst1.32 {q0}, [r0] bx lr ENDPROC(ce_aes_invert) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 04ba66903674..e6da3e30018b 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -10,6 +10,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/unaligned.h> #include <crypto/aes.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> @@ -80,21 +81,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, key_len != AES_KEYSIZE_256) return -EINVAL; - memcpy(ctx->key_enc, in_key, key_len); ctx->key_length = key_len; + for (i = 0; i < kwords; i++) + ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); kernel_neon_begin(); for (i = 0; i < sizeof(rcon); i++) { u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; -#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); rko[0] = rko[0] ^ rki[0] ^ rcon[i]; -#else - rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8); - rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24); -#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; -- 2.17.1