On 10/16/2017 08:51 AM, Andrey Ryabinin wrote: > struct sha256_ctx_mgr allocated in sha256_mb_mod_init() via kzalloc() > and later passed in sha256_mb_flusher_mgr_flush_avx2() function where > instructions vmovdqa used to access the struct. vmovdqa requires > 16-bytes aligned argument, but nothing guarantees that struct > sha256_ctx_mgr will have that alignment. Unaligned vmovdqa will > generate GP fault. > > Fix this by replacing vmovdqa with vmovdqu which doesn't have alignment > requirements. Using vmovdqu will be a bit slower if the structure is unaligned. However, flush is done on the non performance critical path so I don't expect this will be an issue to performance. Thanks. Tim Acked-by: Tim Chen > > Fixes: a377c6b1876e ("crypto: sha256-mb - submit/flush routines for AVX2") > Reported-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> > Signed-off-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> > Cc: <stable@xxxxxxxxxxxxxxx> > --- > arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S | 12 ++++++------ > 1 file changed, 6 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > index 8fe6338bcc84..16c4ccb1f154 100644 > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S > @@ -155,8 +155,8 @@ LABEL skip_ %I > .endr > > # Find min length > - vmovdqa _lens+0*16(state), %xmm0 > - vmovdqa _lens+1*16(state), %xmm1 > + vmovdqu _lens+0*16(state), %xmm0 > + vmovdqu _lens+1*16(state), %xmm1 > > vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} > @@ -176,8 +176,8 @@ LABEL skip_ %I > vpsubd %xmm2, %xmm0, %xmm0 > vpsubd %xmm2, %xmm1, %xmm1 > > - vmovdqa %xmm0, _lens+0*16(state) > - vmovdqa %xmm1, _lens+1*16(state) > + vmovdqu %xmm0, _lens+0*16(state) > + vmovdqu %xmm1, _lens+1*16(state) > > # "state" and "args" are the same address, arg1 > # len is arg2 > @@ -234,8 +234,8 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) > jc .return_null > > # Find min length > - vmovdqa _lens(state), %xmm0 > - vmovdqa _lens+1*16(state), %xmm1 > + vmovdqu _lens(state), %xmm0 > + vmovdqu _lens+1*16(state), %xmm1 > > vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} > vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} >