On Mon, 8 Apr 2024 at 16:18, Stefan Kanthak <stefan.kanthak@xxxxxxxx> wrote: > > Get rid of 16 byte constant, use shift instead of mask > This is missing a signoff > --- -/arch/x86/crypto/sha1_ni_asm.S > +++ +/arch/x86/crypto/sha1_ni_asm.S > @@ -104,9 +104,9 @@ > add DATA_PTR, NUM_BLKS /* pointer to end of data */ > > /* load initial hash values */ > - pinsrd $3, 1*16(DIGEST_PTR), E0 > + pinsrd $0, 1*16(DIGEST_PTR), E0 > movdqu 0*16(DIGEST_PTR), ABCD > - pand UPPER_WORD_MASK(%rip), E0 > + pslldq $12, E0 > pshufd $0x1B, ABCD, ABCD > Wouldn't it be simpler and therefore better to simply zero register E0 before loading E into the top lane? > movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK > @@ -297,8 +297,3 @@ > .align 16 > PSHUFFLE_BYTE_FLIP_MASK: > .octa 0x000102030405060708090a0b0c0d0e0f > - > -.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 > -.align 16 > -UPPER_WORD_MASK: > - .octa 0xFFFFFFFF000000000000000000000000 >