Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the CAST6 input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx> --- crypto/cast6_generic.c | 39 +++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index c77ff6c8a2b2..75346380aa0b 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -10,7 +10,7 @@ */ -#include <asm/byteorder.h> +#include <asm/unaligned.h> #include <linux/init.h> #include <linux/crypto.h> #include <linux/module.h> @@ -172,16 +172,14 @@ static inline void QBAR(u32 *block, const u8 *Kr, const u32 *Km) void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) { const struct cast6_ctx *c = ctx; - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 block[4]; const u32 *Km; const u8 *Kr; - block[0] = be32_to_cpu(src[0]); - block[1] = be32_to_cpu(src[1]); - block[2] = be32_to_cpu(src[2]); - block[3] = be32_to_cpu(src[3]); + block[0] = get_unaligned_be32(inbuf); + block[1] = get_unaligned_be32(inbuf + 4); + block[2] = get_unaligned_be32(inbuf + 8); + block[3] = get_unaligned_be32(inbuf + 12); Km = c->Km[0]; Kr = c->Kr[0]; Q(block, Kr, Km); Km = c->Km[1]; Kr = c->Kr[1]; Q(block, Kr, Km); @@ -196,10 +194,10 @@ void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) Km = c->Km[10]; Kr = c->Kr[10]; QBAR(block, Kr, Km); Km = c->Km[11]; Kr = c->Kr[11]; QBAR(block, Kr, Km); - dst[0] = cpu_to_be32(block[0]); - dst[1] = cpu_to_be32(block[1]); - dst[2] = cpu_to_be32(block[2]); - dst[3] = cpu_to_be32(block[3]); + put_unaligned_be32(block[0], outbuf); + put_unaligned_be32(block[1], outbuf + 4); + put_unaligned_be32(block[2], outbuf + 8); + put_unaligned_be32(block[3], outbuf + 12); } EXPORT_SYMBOL_GPL(__cast6_encrypt); @@ -211,16 +209,14 @@ static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) { const struct cast6_ctx *c = ctx; - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 block[4]; const u32 *Km; const u8 *Kr; - block[0] = be32_to_cpu(src[0]); - block[1] = be32_to_cpu(src[1]); - block[2] = be32_to_cpu(src[2]); - block[3] = be32_to_cpu(src[3]); + block[0] = get_unaligned_be32(inbuf); + block[1] = get_unaligned_be32(inbuf + 4); + block[2] = get_unaligned_be32(inbuf + 8); + block[3] = get_unaligned_be32(inbuf + 12); Km = c->Km[11]; Kr = c->Kr[11]; Q(block, Kr, Km); Km = c->Km[10]; Kr = c->Kr[10]; Q(block, Kr, Km); @@ -235,10 +231,10 @@ void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) Km = c->Km[1]; Kr = c->Kr[1]; QBAR(block, Kr, Km); Km = c->Km[0]; Kr = c->Kr[0]; QBAR(block, Kr, Km); - dst[0] = cpu_to_be32(block[0]); - dst[1] = cpu_to_be32(block[1]); - dst[2] = cpu_to_be32(block[2]); - dst[3] = cpu_to_be32(block[3]); + put_unaligned_be32(block[0], outbuf); + put_unaligned_be32(block[1], outbuf + 4); + put_unaligned_be32(block[2], outbuf + 8); + put_unaligned_be32(block[3], outbuf + 12); } EXPORT_SYMBOL_GPL(__cast6_decrypt); @@ -254,7 +250,6 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAST6_BLOCK_SIZE, .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { -- 2.20.1