[PATCH v2 09/12] x86/crypto: Fix RBP usage in sha256-avx2-asm.S

Josh Poimboeuf <jpoimboe@xxxxxxxxxx> · Mon, 18 Sep 2017 14:42:08 -0500

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

There's no need to use RBP as a temporary register for the TBL value,
because it always stores the same value: the address of the K256 table.
Instead just reference the address of K256 directly.

Reported-by: Eric Biggers <ebiggers@xxxxxxxxxx>
Reported-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Tested-by: Eric Biggers <ebiggers@xxxxxxxxxx>
Acked-by: Eric Biggers <ebiggers@xxxxxxxxxx>
Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
 arch/x86/crypto/sha256-avx2-asm.S | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 89c8f09787d2..1420db15dcdd 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -98,8 +98,6 @@ d	= %r8d
 e       = %edx	# clobbers NUM_BLKS
 y3	= %esi	# clobbers INP
 
-
-TBL	= %rbp
 SRND	= CTX	# SRND is same register as CTX
 
 a = %eax
@@ -531,7 +529,6 @@ STACK_SIZE	= _RSP      + _RSP_SIZE
 ENTRY(sha256_transform_rorx)
 .align 32
 	pushq	%rbx
-	pushq	%rbp
 	pushq	%r12
 	pushq	%r13
 	pushq	%r14
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
 	mov	CTX, _CTX(%rsp)
 
 loop0:
-	lea     K256(%rip), TBL
-
 	## Load first 16 dwords from two blocks
 	VMOVDQ	0*32(INP),XTMP0
 	VMOVDQ	1*32(INP),XTMP1
@@ -597,19 +592,19 @@ last_block_enter:
 
 .align 16
 loop1:
-	vpaddd	0*32(TBL, SRND), X0, XFER
+	vpaddd	K256+0*32(SRND), X0, XFER
 	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 0*32
 
-	vpaddd	1*32(TBL, SRND), X0, XFER
+	vpaddd	K256+1*32(SRND), X0, XFER
 	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 1*32
 
-	vpaddd	2*32(TBL, SRND), X0, XFER
+	vpaddd	K256+2*32(SRND), X0, XFER
 	vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 2*32
 
-	vpaddd	3*32(TBL, SRND), X0, XFER
+	vpaddd	K256+3*32(SRND), X0, XFER
 	vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 3*32
 
@@ -619,10 +614,11 @@ loop1:
 
 loop2:
 	## Do last 16 rounds with no scheduling
-	vpaddd	0*32(TBL, SRND), X0, XFER
+	vpaddd	K256+0*32(SRND), X0, XFER
 	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
 	DO_4ROUNDS	_XFER + 0*32
-	vpaddd	1*32(TBL, SRND), X1, XFER
+
+	vpaddd	K256+1*32(SRND), X1, XFER
 	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
 	DO_4ROUNDS	_XFER + 1*32
 	add	$2*32, SRND
@@ -676,9 +672,6 @@ loop3:
 	ja	done_hash
 
 do_last_block:
-	#### do last block
-	lea	K256(%rip), TBL
-
 	VMOVDQ	0*16(INP),XWORD0
 	VMOVDQ	1*16(INP),XWORD1
 	VMOVDQ	2*16(INP),XWORD2
@@ -718,7 +711,6 @@ done_hash:
 	popq	%r14
 	popq	%r13
 	popq	%r12
-	popq	%rbp
 	popq	%rbx
 	ret
 ENDPROC(sha256_transform_rorx)
-- 
2.13.5