On Wed, 10 Mar 2021 at 08:29, Eric Biggers <ebiggers@xxxxxxxxxx> wrote: > > From: Eric Biggers <ebiggers@xxxxxxxxxx> > > The new ARM BLAKE2s code doesn't work correctly (fails the self-tests) > in big endian kernel builds because it doesn't swap the endianness of > the message words when loading them. Fix this. > > Fixes: 5172d322d34c ("crypto: arm/blake2s - add ARM scalar optimized BLAKE2s") > Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx> Acked-by: Ard Biesheuvel <ardb@xxxxxxxxxx> > --- > arch/arm/crypto/blake2s-core.S | 21 +++++++++++++++++++++ > 1 file changed, 21 insertions(+) > > diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S > index bed897e9a181a..86345751bbf3a 100644 > --- a/arch/arm/crypto/blake2s-core.S > +++ b/arch/arm/crypto/blake2s-core.S > @@ -8,6 +8,7 @@ > */ > > #include <linux/linkage.h> > +#include <asm/assembler.h> > > // Registers used to hold message words temporarily. There aren't > // enough ARM registers to hold the whole message block, so we have to > @@ -38,6 +39,23 @@ > #endif > .endm > > +.macro _le32_bswap a, tmp > +#ifdef __ARMEB__ > + rev_l \a, \tmp > +#endif > +.endm > + > +.macro _le32_bswap_8x a, b, c, d, e, f, g, h, tmp > + _le32_bswap \a, \tmp > + _le32_bswap \b, \tmp > + _le32_bswap \c, \tmp > + _le32_bswap \d, \tmp > + _le32_bswap \e, \tmp > + _le32_bswap \f, \tmp > + _le32_bswap \g, \tmp > + _le32_bswap \h, \tmp > +.endm > + > // Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals. > // (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two > // columns/diagonals. s0-s1 are the word offsets to the message words the first > @@ -180,8 +198,10 @@ ENTRY(blake2s_compress_arch) > tst r1, #3 > bne .Lcopy_block_misaligned > ldmia r1!, {r2-r9} > + _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14 > stmia r12!, {r2-r9} > ldmia r1!, {r2-r9} > + _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14 > stmia r12, {r2-r9} > .Lcopy_block_done: > str r1, [sp, #68] // Update message pointer > @@ -268,6 +288,7 @@ ENTRY(blake2s_compress_arch) > 1: > #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > ldr r3, [r1], #4 > + _le32_bswap r3, r4 > #else > ldrb r3, [r1, #0] > ldrb r4, [r1, #1] > -- > 2.30.1 >