On Mon, Jan 24, 2022 at 11:29 AM Jason A. Donenfeld <Jason@xxxxxxxxx> wrote: > > blake2s_compress_generic is weakly aliased by blake2s_generic. The > current harness for function selection uses a function pointer, which is > ordinarily inlined and resolved at compile time. But when Clang's CFI is > enabled, CFI still triggers when making an indirect call via a weak > symbol. This seems like a bug in Clang's CFI, as though it's bucketing > weak symbols and strong symbols differently. It also only seems to > trigger when "full LTO" mode is used, rather than "thin LTO". > > [ 0.000000][ T0] Kernel panic - not syncing: CFI failure (target: blake2s_compress_generic+0x0/0x1444) > [ 0.000000][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-mainline-06981-g076c855b846e #1 > [ 0.000000][ T0] Hardware name: MT6873 (DT) > [ 0.000000][ T0] Call trace: > [ 0.000000][ T0] dump_backtrace+0xfc/0x1dc > [ 0.000000][ T0] dump_stack_lvl+0xa8/0x11c > [ 0.000000][ T0] panic+0x194/0x464 > [ 0.000000][ T0] __cfi_check_fail+0x54/0x58 > [ 0.000000][ T0] __cfi_slowpath_diag+0x354/0x4b0 > [ 0.000000][ T0] blake2s_update+0x14c/0x178 > [ 0.000000][ T0] _extract_entropy+0xf4/0x29c > [ 0.000000][ T0] crng_initialize_primary+0x24/0x94 > [ 0.000000][ T0] rand_initialize+0x2c/0x6c > [ 0.000000][ T0] start_kernel+0x2f8/0x65c > [ 0.000000][ T0] __primary_switched+0xc4/0x7be4 > [ 0.000000][ T0] Rebooting in 5 seconds.. > > Nonetheless, the function pointer method isn't so terrific anyway, so > this patch replaces it with a simple boolean, which also gets inlined > away. This successfully works around the Clang bug. Acked-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx> Thanks for the report. Once we have a fix in hand for LLVM, we can revisit removing this and raising the required LLVM version for CFI. > > In general, I'm not too keen on all of the indirection involved here; it > clearly does more harm than good. Hopefully the whole thing can get > cleaned up down the road when lib/crypto is overhauled more > comprehensively. But for now, we go with a simple bandaid. > > Fixes: 6048fdcc5f26 ("lib/crypto: blake2s: include as built-in") > Reported-by: Miles Chen <miles.chen@xxxxxxxxxxxx> > Tested-by: Miles Chen <miles.chen@xxxxxxxxxxxx> > Tested-by: Nathan Chancellor <nathan@xxxxxxxxxx> > Link: https://github.com/ClangBuiltLinux/linux/issues/1567 > Cc: Nick Desaulniers <ndesaulniers@xxxxxxxxxx> > Cc: Sami Tolvanen <samitolvanen@xxxxxxxxxx> > Cc: Ard Biesheuvel <ardb@xxxxxxxxxx> > Signed-off-by: Jason A. Donenfeld <Jason@xxxxxxxxx> > --- > Changes v1->v2: > - Wrapped columns at 80 for Eric. > > arch/arm/crypto/blake2s-shash.c | 4 ++-- > arch/x86/crypto/blake2s-shash.c | 4 ++-- > crypto/blake2s_generic.c | 4 ++-- > include/crypto/internal/blake2s.h | 40 +++++++++++++++++++------------ > lib/crypto/blake2s.c | 4 ++-- > 5 files changed, 33 insertions(+), 23 deletions(-) > > diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c > index 17c1c3bfe2f5..763c73beea2d 100644 > --- a/arch/arm/crypto/blake2s-shash.c > +++ b/arch/arm/crypto/blake2s-shash.c > @@ -13,12 +13,12 @@ > static int crypto_blake2s_update_arm(struct shash_desc *desc, > const u8 *in, unsigned int inlen) > { > - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); > + return crypto_blake2s_update(desc, in, inlen, false); > } > > static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) > { > - return crypto_blake2s_final(desc, out, blake2s_compress); > + return crypto_blake2s_final(desc, out, false); > } > > #define BLAKE2S_ALG(name, driver_name, digest_size) \ > diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c > index f9e2fecdb761..59ae28abe35c 100644 > --- a/arch/x86/crypto/blake2s-shash.c > +++ b/arch/x86/crypto/blake2s-shash.c > @@ -18,12 +18,12 @@ > static int crypto_blake2s_update_x86(struct shash_desc *desc, > const u8 *in, unsigned int inlen) > { > - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); > + return crypto_blake2s_update(desc, in, inlen, false); > } > > static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) > { > - return crypto_blake2s_final(desc, out, blake2s_compress); > + return crypto_blake2s_final(desc, out, false); > } > > #define BLAKE2S_ALG(name, driver_name, digest_size) \ > diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c > index 72fe480f9bd6..5f96a21f8788 100644 > --- a/crypto/blake2s_generic.c > +++ b/crypto/blake2s_generic.c > @@ -15,12 +15,12 @@ > static int crypto_blake2s_update_generic(struct shash_desc *desc, > const u8 *in, unsigned int inlen) > { > - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic); > + return crypto_blake2s_update(desc, in, inlen, true); > } > > static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out) > { > - return crypto_blake2s_final(desc, out, blake2s_compress_generic); > + return crypto_blake2s_final(desc, out, true); > } > > #define BLAKE2S_ALG(name, driver_name, digest_size) \ > diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h > index d39cfa0d333e..52363eee2b20 100644 > --- a/include/crypto/internal/blake2s.h > +++ b/include/crypto/internal/blake2s.h > @@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) > state->f[0] = -1; > } > > -typedef void (*blake2s_compress_t)(struct blake2s_state *state, > - const u8 *block, size_t nblocks, u32 inc); > - > /* Helper functions for BLAKE2s shared by the library and shash APIs */ > > -static inline void __blake2s_update(struct blake2s_state *state, > - const u8 *in, size_t inlen, > - blake2s_compress_t compress) > +static __always_inline void > +__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, > + bool force_generic) > { > const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; > > @@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state, > return; > if (inlen > fill) { > memcpy(state->buf + state->buflen, in, fill); > - (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); > + if (force_generic) > + blake2s_compress_generic(state, state->buf, 1, > + BLAKE2S_BLOCK_SIZE); > + else > + blake2s_compress(state, state->buf, 1, > + BLAKE2S_BLOCK_SIZE); > state->buflen = 0; > in += fill; > inlen -= fill; > @@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state, > if (inlen > BLAKE2S_BLOCK_SIZE) { > const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); > /* Hash one less (full) block than strictly possible */ > - (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); > + if (force_generic) > + blake2s_compress_generic(state, in, nblocks - 1, > + BLAKE2S_BLOCK_SIZE); > + else > + blake2s_compress(state, in, nblocks - 1, > + BLAKE2S_BLOCK_SIZE); > in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); > inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); > } > @@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state, > state->buflen += inlen; > } > > -static inline void __blake2s_final(struct blake2s_state *state, u8 *out, > - blake2s_compress_t compress) > +static __always_inline void > +__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) > { > blake2s_set_lastblock(state); > memset(state->buf + state->buflen, 0, > BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ > - (*compress)(state, state->buf, 1, state->buflen); > + if (force_generic) > + blake2s_compress_generic(state, state->buf, 1, state->buflen); > + else > + blake2s_compress(state, state->buf, 1, state->buflen); > cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); > memcpy(out, state->h, state->outlen); > } > @@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) > > static inline int crypto_blake2s_update(struct shash_desc *desc, > const u8 *in, unsigned int inlen, > - blake2s_compress_t compress) > + bool force_generic) > { > struct blake2s_state *state = shash_desc_ctx(desc); > > - __blake2s_update(state, in, inlen, compress); > + __blake2s_update(state, in, inlen, force_generic); > return 0; > } > > static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, > - blake2s_compress_t compress) > + bool force_generic) > { > struct blake2s_state *state = shash_desc_ctx(desc); > > - __blake2s_final(state, out, compress); > + __blake2s_final(state, out, force_generic); > return 0; > } > > diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c > index 9364f79937b8..c71c09621c09 100644 > --- a/lib/crypto/blake2s.c > +++ b/lib/crypto/blake2s.c > @@ -18,14 +18,14 @@ > > void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) > { > - __blake2s_update(state, in, inlen, blake2s_compress); > + __blake2s_update(state, in, inlen, false); > } > EXPORT_SYMBOL(blake2s_update); > > void blake2s_final(struct blake2s_state *state, u8 *out) > { > WARN_ON(IS_ENABLED(DEBUG) && !out); > - __blake2s_final(state, out, blake2s_compress); > + __blake2s_final(state, out, false); > memzero_explicit(state, sizeof(*state)); > } > EXPORT_SYMBOL(blake2s_final); > -- > 2.34.1 > -- Thanks, ~Nick Desaulniers