Re: crypto/blake2b_generic.c:245:1: warning: the frame size of 1220 bytes is larger than 1024 bytes

David Sterba <dsterba@xxxxxxx> · Tue, 3 Dec 2019 14:16:38 +0100

On Sun, Dec 01, 2019 at 05:54:53AM +0800, kbuild test robot wrote:
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
> head:   32ef9553635ab1236c33951a8bd9b5af1c3b1646
> commit: 91d689337fe8b7703608a2ec39aae700b99f3933 crypto: blake2b - add blake2b generic implementation
> date:   4 weeks ago
> config: arc-randconfig-a0031-20191201 (attached as .config)
> compiler: arc-elf-gcc (GCC) 7.4.0
> reproduce:
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         git checkout 91d689337fe8b7703608a2ec39aae700b99f3933
>         # save the attached .config to linux build tree
>         GCC_VERSION=7.4.0 make.cross ARCH=arc 

So this is for ARC.

> If you fix the issue, kindly add following tag
> Reported-by: kbuild test robot <lkp@xxxxxxxxx>
> 
> All warnings (new ones prefixed by >>):
> 
>    crypto/blake2b_generic.c: In function 'blake2b_compress':
> >> crypto/blake2b_generic.c:245:1: warning: the frame size of 1220 bytes is larger than 1024 bytes [-Wframe-larger-than=]

1220 looks like a lot, the x86_64 asks for 288 bytes for
blake2b_compress, this roughly matches the declarations and effects of
inlining (2 x 16 x sizeof(u64) is 256).

I'm not familiar with ARC limitations regarding eg. 64 bit types so this
would be my first guess that this requires more temporary stack space
than other arches that can handle u64 just fine.

>     }
>     ^
> 
> vim +245 crypto/blake2b_generic.c
> 
>    183	
>    184	#define G(r,i,a,b,c,d)                                  \
>    185		do {                                            \
>    186			a = a + b + m[blake2b_sigma[r][2*i+0]]; \
>    187			d = ror64(d ^ a, 32);                   \
>    188			c = c + d;                              \
>    189			b = ror64(b ^ c, 24);                   \
>    190			a = a + b + m[blake2b_sigma[r][2*i+1]]; \
>    191			d = ror64(d ^ a, 16);                   \
>    192			c = c + d;                              \
>    193			b = ror64(b ^ c, 63);                   \
>    194		} while (0)
>    195	
>    196	#define ROUND(r)                                \
>    197		do {                                    \
>    198			G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
>    199			G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
>    200			G(r,2,v[ 2],v[ 6],v[10],v[14]); \
>    201			G(r,3,v[ 3],v[ 7],v[11],v[15]); \
>    202			G(r,4,v[ 0],v[ 5],v[10],v[15]); \
>    203			G(r,5,v[ 1],v[ 6],v[11],v[12]); \
>    204			G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
>    205			G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
>    206		} while (0)
>    207	
>    208	static void blake2b_compress(struct blake2b_state *S,
>    209				     const u8 block[BLAKE2B_BLOCKBYTES])
>    210	{
>    211		u64 m[16];
>    212		u64 v[16];
>    213		size_t i;
>    214	
>    215		for (i = 0; i < 16; ++i)
>    216			m[i] = get_unaligned_le64(block + i * sizeof(m[i]));
>    217	
>    218		for (i = 0; i < 8; ++i)
>    219			v[i] = S->h[i];
>    220	
>    221		v[ 8] = blake2b_IV[0];
>    222		v[ 9] = blake2b_IV[1];
>    223		v[10] = blake2b_IV[2];
>    224		v[11] = blake2b_IV[3];
>    225		v[12] = blake2b_IV[4] ^ S->t[0];
>    226		v[13] = blake2b_IV[5] ^ S->t[1];
>    227		v[14] = blake2b_IV[6] ^ S->f[0];
>    228		v[15] = blake2b_IV[7] ^ S->f[1];
>    229	
>    230		ROUND(0);
>    231		ROUND(1);
>    232		ROUND(2);
>    233		ROUND(3);
>    234		ROUND(4);
>    235		ROUND(5);
>    236		ROUND(6);
>    237		ROUND(7);
>    238		ROUND(8);
>    239		ROUND(9);
>    240		ROUND(10);
>    241		ROUND(11);
>    242	
>    243		for (i = 0; i < 8; ++i)
>    244			S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
>  > 245	}

(rest of mail kept for reference)