Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R12 instead of RBP for the TBL register. Since R12 is also used as another temporary register (T1), it gets clobbered in each round of computation. So the table address needs to be freshly reloaded into R12 each time it's used. Reported-by: Eric Biggers <ebiggers@xxxxxxxxxx> Reported-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> --- arch/x86/crypto/sha256-avx2-asm.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 89c8f09787d2..cdd647231fa9 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -99,7 +99,7 @@ e = %edx # clobbers NUM_BLKS y3 = %esi # clobbers INP -TBL = %rbp +TBL = %r12 # clobbered by T1 SRND = CTX # SRND is same register as CTX a = %eax @@ -531,7 +531,6 @@ STACK_SIZE = _RSP + _RSP_SIZE ENTRY(sha256_transform_rorx) .align 32 pushq %rbx - pushq %rbp pushq %r12 pushq %r13 pushq %r14 @@ -568,8 +567,6 @@ ENTRY(sha256_transform_rorx) mov CTX, _CTX(%rsp) loop0: - lea K256(%rip), TBL - ## Load first 16 dwords from two blocks VMOVDQ 0*32(INP),XTMP0 VMOVDQ 1*32(INP),XTMP1 @@ -597,18 +594,22 @@ last_block_enter: .align 16 loop1: + lea K256(%rip), TBL vpaddd 0*32(TBL, SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 0*32 + lea K256(%rip), TBL vpaddd 1*32(TBL, SRND), X0, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 1*32 + lea K256(%rip), TBL vpaddd 2*32(TBL, SRND), X0, XFER vmovdqa XFER, 2*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 2*32 + lea K256(%rip), TBL vpaddd 3*32(TBL, SRND), X0, XFER vmovdqa XFER, 3*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 3*32 @@ -619,9 +620,12 @@ loop1: loop2: ## Do last 16 rounds with no scheduling + lea K256(%rip), TBL vpaddd 0*32(TBL, SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 0*32 + + lea K256(%rip), TBL vpaddd 1*32(TBL, SRND), X1, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 1*32 @@ -676,9 +680,6 @@ loop3: ja done_hash do_last_block: - #### do last block - lea K256(%rip), TBL - VMOVDQ 0*16(INP),XWORD0 VMOVDQ 1*16(INP),XWORD1 VMOVDQ 2*16(INP),XWORD2 @@ -718,7 +719,6 @@ done_hash: popq %r14 popq %r13 popq %r12 - popq %rbp popq %rbx ret ENDPROC(sha256_transform_rorx) -- 2.13.5