Clang-9 makes some different inlining decisions compared to gcc, which leads to a warning about a possible stack overflow problem when building with CONFIG_KASAN, including when setting asan-stack=0, which avoids most other frame overflow warnings: drivers/media/platform/vicodec/codec-fwht.c:673:12: error: stack frame size of 2224 bytes in function 'encode_plane' Manually adding noinline_for_stack annotations in those functions called by encode_plane() or decode_plane() that require a significant amount of kernel stack makes this impossible to happen with any compiler. Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx> --- drivers/media/platform/vicodec/codec-fwht.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c index d1d6085da9f1..135d56bcc2c5 100644 --- a/drivers/media/platform/vicodec/codec-fwht.c +++ b/drivers/media/platform/vicodec/codec-fwht.c @@ -47,7 +47,7 @@ static const uint8_t zigzag[64] = { }; -static int rlc(const s16 *in, __be16 *output, int blocktype) +static int noinline_for_stack rlc(const s16 *in, __be16 *output, int blocktype) { s16 block[8 * 8]; s16 *wp = block; @@ -106,8 +106,8 @@ static int rlc(const s16 *in, __be16 *output, int blocktype) * This function will worst-case increase rlc_in by 65*2 bytes: * one s16 value for the header and 8 * 8 coefficients of type s16. */ -static u16 derlc(const __be16 **rlc_in, s16 *dwht_out, - const __be16 *end_of_input) +static noinline_for_stack u16 +derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input) { /* header */ const __be16 *input = *rlc_in; @@ -373,7 +373,8 @@ static void fwht(const u8 *block, s16 *output_block, unsigned int stride, * Furthermore values can be negative... This is just a version that * works with 16 signed data */ -static void fwht16(const s16 *block, s16 *output_block, int stride, int intra) +static void noinline_for_stack +fwht16(const s16 *block, s16 *output_block, int stride, int intra) { /* we'll need more than 8 bits for the transformed coefficients */ s32 workspace1[8], workspace2[8]; @@ -456,7 +457,8 @@ static void fwht16(const s16 *block, s16 *output_block, int stride, int intra) } } -static void ifwht(const s16 *block, s16 *output_block, int intra) +static noinline_for_stack void +ifwht(const s16 *block, s16 *output_block, int intra) { /* * we'll need more than 8 bits for the transformed coefficients @@ -604,9 +606,9 @@ static int var_inter(const s16 *old, const s16 *new) return ret; } -static int decide_blocktype(const u8 *cur, const u8 *reference, - s16 *deltablock, unsigned int stride, - unsigned int input_step) +static noinline_for_stack int +decide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock, + unsigned int stride, unsigned int input_step) { s16 tmp[64]; s16 old[64]; -- 2.20.0