On Fri, 26 Nov 2021 at 16:02, Arnd Bergmann <arnd@xxxxxxxxxx> wrote: > > From: Arnd Bergmann <arnd@xxxxxxxx> > > On ARM v6 and later, we define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > because the ordinary load/store instructions (ldr, ldrh, ldrb) can > tolerate any misalignment of the memory address. However, load/store > double and load/store multiple instructions (ldrd, ldm) may still only > be used on memory addresses that are 32-bit aligned, and so we have to > use the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS macro with care, or we > may end up with a severe performance hit due to alignment traps that > require fixups by the kernel. Testing shows that this currently happens > with clang-13 but not gcc-11. In theory, any compiler version can > produce this bug or other problems, as we are dealing with undefined > behavior in C99 even on architectures that support this in hardware, > see also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363. > > Fortunately, the get_unaligned() accessors do the right thing: when > building for ARMv6 or later, the compiler will emit unaligned accesses > using the ordinary load/store instructions (but avoid the ones that > require 32-bit alignment). When building for older ARM, those accessors > will emit the appropriate sequence of ldrb/mov/orr instructions. And on > architectures that can truly tolerate any kind of misalignment, the > get_unaligned() accessors resolve to the leXX_to_cpup accessors that > operate on aligned addresses. > > Since the compiler will in fact emit ldrd or ldm instructions when > building this code for ARM v6 or later, the solution is to use the > unaligned accessors unconditionally on architectures where this is > known to be fast. The _aligned version of the hash function is > however still needed to get the best performance on architectures > that cannot do any unaligned access in hardware. > > This new version avoids the undefined behavior and should produce > the fastest hash on all architectures we support. > > Link: https://lore.kernel.org/linux-arm-kernel/20181008211554.5355-4-ard.biesheuvel@xxxxxxxxxx/ > Reported-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> > Fixes: 2c956a60778c ("siphash: add cryptographically secure PRF") > Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx> Acked-by: Ard Biesheuvel <ardb@xxxxxxxxxx> > --- > include/linux/siphash.h | 14 ++++---------- > lib/siphash.c | 12 ++++++------ > 2 files changed, 10 insertions(+), 16 deletions(-) > > diff --git a/include/linux/siphash.h b/include/linux/siphash.h > index bf21591a9e5e..0cda61855d90 100644 > --- a/include/linux/siphash.h > +++ b/include/linux/siphash.h > @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) > } > > u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); > -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); > -#endif > > u64 siphash_1u64(const u64 a, const siphash_key_t *key); > u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); > @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, > static inline u64 siphash(const void *data, size_t len, > const siphash_key_t *key) > { > -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) > + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || > + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) > return __siphash_unaligned(data, len, key); > -#endif > return ___siphash_aligned(data, len, key); > } > > @@ -96,10 +93,8 @@ typedef struct { > > u32 __hsiphash_aligned(const void *data, size_t len, > const hsiphash_key_t *key); > -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u32 __hsiphash_unaligned(const void *data, size_t len, > const hsiphash_key_t *key); > -#endif > > u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); > u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); > @@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, > static inline u32 hsiphash(const void *data, size_t len, > const hsiphash_key_t *key) > { > -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) > + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || > + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) > return __hsiphash_unaligned(data, len, key); > -#endif > return ___hsiphash_aligned(data, len, key); > } > > diff --git a/lib/siphash.c b/lib/siphash.c > index a90112ee72a1..72b9068ab57b 100644 > --- a/lib/siphash.c > +++ b/lib/siphash.c > @@ -49,6 +49,7 @@ > SIPROUND; \ > return (v0 ^ v1) ^ (v2 ^ v3); > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) > { > const u8 *end = data + len - (len % sizeof(u64)); > @@ -80,8 +81,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) > POSTAMBLE > } > EXPORT_SYMBOL(__siphash_aligned); > +#endif > > -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) > { > const u8 *end = data + len - (len % sizeof(u64)); > @@ -113,7 +114,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) > POSTAMBLE > } > EXPORT_SYMBOL(__siphash_unaligned); > -#endif > > /** > * siphash_1u64 - compute 64-bit siphash PRF value of a u64 > @@ -250,6 +250,7 @@ EXPORT_SYMBOL(siphash_3u32); > HSIPROUND; \ > return (v0 ^ v1) ^ (v2 ^ v3); > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) > { > const u8 *end = data + len - (len % sizeof(u64)); > @@ -280,8 +281,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) > HPOSTAMBLE > } > EXPORT_SYMBOL(__hsiphash_aligned); > +#endif > > -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u32 __hsiphash_unaligned(const void *data, size_t len, > const hsiphash_key_t *key) > { > @@ -313,7 +314,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, > HPOSTAMBLE > } > EXPORT_SYMBOL(__hsiphash_unaligned); > -#endif > > /** > * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 > @@ -418,6 +418,7 @@ EXPORT_SYMBOL(hsiphash_4u32); > HSIPROUND; \ > return v1 ^ v3; > > +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) > { > const u8 *end = data + len - (len % sizeof(u32)); > @@ -438,8 +439,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) > HPOSTAMBLE > } > EXPORT_SYMBOL(__hsiphash_aligned); > +#endif > > -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS > u32 __hsiphash_unaligned(const void *data, size_t len, > const hsiphash_key_t *key) > { > @@ -461,7 +462,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, > HPOSTAMBLE > } > EXPORT_SYMBOL(__hsiphash_unaligned); > -#endif > > /** > * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 > -- > 2.29.2 >