The SIMD routine ported from x86 used to have a special code path for inputs < 16 bytes, which got lost somewhere along the way. Instead, the current glue code aligns the input pointer to permit the NEON routine to use special versions of the vld1 instructions that assume 16 byte alignment, but this could result in inputs of less than 16 bytes to be passed in. This not only fails the new extended tests that Eric has implemented, it also results in the code reading before the input pointer, which could potentially result in crashes when dealing with less than 16 bytes of input at the start of a page which is preceded by an unmapped page. So update the glue code to only invoke the NEON routine if the input is more than 16 bytes. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> --- arch/arm64/crypto/crct10dif-ce-glue.c | 25 +++++--------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c index b461d62023f2..567c24f3d224 100644 --- a/arch/arm64/crypto/crct10dif-ce-glue.c +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -39,26 +39,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data, unsigned int length) { u16 *crc = shash_desc_ctx(desc); - unsigned int l; - if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) { - l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE - - ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)); - - *crc = crc_t10dif_generic(*crc, data, l); - - length -= l; - data += l; - } - - if (length > 0) { - if (may_use_simd()) { - kernel_neon_begin(); - *crc = crc_t10dif_pmull(*crc, data, length); - kernel_neon_end(); - } else { - *crc = crc_t10dif_generic(*crc, data, length); - } + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) { + kernel_neon_begin(); + *crc = crc_t10dif_pmull(*crc, data, length); + kernel_neon_end(); + } else { + *crc = crc_t10dif_generic(*crc, data, length); } return 0; -- 2.17.1