Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the CAST5 input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx> --- crypto/cast5_generic.c | 23 ++++++++------------ 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 4095085d4e51..0257c14cefc2 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -13,7 +13,7 @@ */ -#include <asm/byteorder.h> +#include <asm/unaligned.h> #include <linux/init.h> #include <linux/crypto.h> #include <linux/module.h> @@ -302,8 +302,6 @@ static const u32 sb8[256] = { void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) { - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 l, r, t; u32 I; /* used by the Fx macros */ u32 *Km; @@ -315,8 +313,8 @@ void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.) */ - l = be32_to_cpu(src[0]); - r = be32_to_cpu(src[1]); + l = get_unaligned_be32(inbuf); + r = get_unaligned_be32(inbuf + 4); /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows: * Li = Ri-1; @@ -347,8 +345,8 @@ void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and * concatenate to form the ciphertext.) */ - dst[0] = cpu_to_be32(r); - dst[1] = cpu_to_be32(l); + put_unaligned_be32(r, outbuf); + put_unaligned_be32(l, outbuf + 4); } EXPORT_SYMBOL_GPL(__cast5_encrypt); @@ -359,8 +357,6 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) { - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 l, r, t; u32 I; u32 *Km; @@ -369,8 +365,8 @@ void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) Km = c->Km; Kr = c->Kr; - l = be32_to_cpu(src[0]); - r = be32_to_cpu(src[1]); + l = get_unaligned_be32(inbuf); + r = get_unaligned_be32(inbuf + 4); if (!(c->rr)) { t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); @@ -391,8 +387,8 @@ void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - dst[0] = cpu_to_be32(r); - dst[1] = cpu_to_be32(l); + put_unaligned_be32(r, outbuf); + put_unaligned_be32(l, outbuf + 4); } EXPORT_SYMBOL_GPL(__cast5_decrypt); @@ -513,7 +509,6 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAST5_BLOCK_SIZE, .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { -- 2.20.1