[PATCH v2 4/4] crypto: x86/sha256-ni - simplify do_4rounds

Eric Biggers <ebiggers@xxxxxxxxxx> · Thu, 11 Apr 2024 09:23:59 -0700

From: Eric Biggers <ebiggers@xxxxxxxxxx>

Instead of loading the message words into both MSG and \m0 and then
adding the round constants to MSG, load the message words into \m0 and
the round constants into MSG and then add \m0 to MSG.  This shortens the
source code slightly.  It changes the instructions slightly, but it
doesn't affect binary code size and doesn't seem to affect performance.

Suggested-by: Stefan Kanthak <stefan.kanthak@xxxxxxxx>
Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx>
---
 arch/x86/crypto/sha256_ni_asm.S | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index ffc9f1c75c15..d515a55a3bc1 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -76,17 +76,15 @@
 #define ABEF_SAVE	%xmm9
 #define CDGH_SAVE	%xmm10
 
 .macro do_4rounds	i, m0, m1, m2, m3
 .if \i < 16
-	movdqu		\i*4(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
-	movdqa		MSG, \m0
-.else
-	movdqa		\m0, MSG
+	movdqu		\i*4(DATA_PTR), \m0
+	pshufb		SHUF_MASK, \m0
 .endif
-	paddd		(\i-32)*4(SHA256CONSTANTS), MSG
+	movdqa		(\i-32)*4(SHA256CONSTANTS), MSG
+	paddd		\m0, MSG
 	sha256rnds2	STATE0, STATE1
 .if \i >= 12 && \i < 60
 	movdqa		\m0, TMP
 	palignr		$4, \m3, TMP
 	paddd		TMP, \m1
-- 
2.44.0