On Wed, Sep 26, 2018 at 5:45 PM Jason A. Donenfeld <Jason@xxxxxxxxx> wrote: > So what you have in mind is something like calling simd_relax() every > 4096 bytes or so? That was actually pretty easy, putting together both of your suggestions: static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst, u8 *src, size_t len, simd_context_t *simd_context) { while (len > PAGE_SIZE) { chacha20_arch(state, dst, src, PAGE_SIZE, simd_context); len -= PAGE_SIZE; src += PAGE_SIZE; dst += PAGE_SIZE; simd_relax(simd_context); } if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon && len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) chacha20_neon(dst, src, len, state->key, state->counter); else chacha20_arm(dst, src, len, state->key, state->counter); state->counter[0] += (len + 63) / 64; return true; }