On Sun, Jun 23, 2024 at 08:38:44PM GMT, Alexei Starovoitov wrote: > On Sun, Jun 23, 2024 at 12:03 AM Shung-Hsi Yu <shung-hsi.yu@xxxxxxxx> wrote: > > signed_add*_overflows() was added back when there was no overflow-check > > helper. With the introduction of such helpers in commit f0907827a8a91 > > ("compiler.h: enable builtin overflow checkers and add fallback code"), we > > can drop signed_add*_overflows() in kernel/bpf/verifier.c and use the > > generic check_add_overflow() instead. > > > > This will make future refactoring easier, and possibly taking advantage of > > compiler-emitted hardware instructions that efficiently implement these > > checks. > > > > Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@xxxxxxxx> > > --- > > shung-hsi.yu: maybe there's a better name instead of {min,max}_cur, but > > I coudln't come up with one. > > Just smin/smax without _cur suffix ? Going with Jiri's suggestion under patch 2 to drop the new variables instead. > What is the asm before/after ? Tested with only this patch applied and compiled with GCC 13.3.0 for x86_64. x86 reading is difficult for me, but I think the relevant change for signed addition are: Before: s64 smin_val = src_reg->smin_value; 675: 4c 8b 46 28 mov 0x28(%rsi),%r8 { 679: 48 89 f8 mov %rdi,%rax s64 smax_val = src_reg->smax_value; u64 umin_val = src_reg->umin_value; u64 umax_val = src_reg->umax_value; if (signed_add_overflows(dst_reg->smin_value, smin_val) || 67c: 48 8b 7f 28 mov 0x28(%rdi),%rdi u64 umin_val = src_reg->umin_value; 680: 48 8b 56 38 mov 0x38(%rsi),%rdx u64 umax_val = src_reg->umax_value; 684: 48 8b 4e 40 mov 0x40(%rsi),%rcx s64 res = (s64)((u64)a + (u64)b); 688: 4d 8d 0c 38 lea (%r8,%rdi,1),%r9 return res < a; 68c: 4c 39 cf cmp %r9,%rdi 68f: 41 0f 9f c2 setg %r10b if (b < 0) 693: 4d 85 c0 test %r8,%r8 696: 0f 88 8f 00 00 00 js 72b <scalar_min_max_add+0xbb> signed_add_overflows(dst_reg->smax_value, smax_val)) { dst_reg->smin_value = S64_MIN; dst_reg->smax_value = S64_MAX; 69c: 48 bf ff ff ff ff ff movabs $0x7fffffffffffffff,%rdi 6a3: ff ff 7f s64 smax_val = src_reg->smax_value; 6a6: 4c 8b 46 30 mov 0x30(%rsi),%r8 dst_reg->smin_value = S64_MIN; 6aa: 48 be 00 00 00 00 00 movabs $0x8000000000000000,%rsi 6b1: 00 00 80 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 6b4: 45 84 d2 test %r10b,%r10b 6b7: 75 12 jne 6cb <scalar_min_max_add+0x5b> signed_add_overflows(dst_reg->smax_value, smax_val)) { 6b9: 4c 8b 50 30 mov 0x30(%rax),%r10 s64 res = (s64)((u64)a + (u64)b); 6bd: 4f 8d 1c 02 lea (%r10,%r8,1),%r11 if (b < 0) 6c1: 4d 85 c0 test %r8,%r8 6c4: 78 58 js 71e <scalar_min_max_add+0xae> if (signed_add_overflows(dst_reg->smin_value, smin_val) || 6c6: 4d 39 da cmp %r11,%r10 6c9: 7e 58 jle 723 <scalar_min_max_add+0xb3> 6cb: 48 89 70 28 mov %rsi,0x28(%rax) ... if (signed_add_overflows(dst_reg->smin_value, smin_val) || 71e: 4d 39 da cmp %r11,%r10 721: 7c a8 jl 6cb <scalar_min_max_add+0x5b> dst_reg->smin_value += smin_val; 723: 4c 89 ce mov %r9,%rsi dst_reg->smax_value += smax_val; 726: 4c 89 df mov %r11,%rdi 729: eb a0 jmp 6cb <scalar_min_max_add+0x5b> return res > a; 72b: 4c 39 cf cmp %r9,%rdi 72e: 41 0f 9c c2 setl %r10b 732: e9 65 ff ff ff jmp 69c <scalar_min_max_add+0x2c> 737: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 73e: 00 00 740: 90 nop After: if (check_add_overflow(dst_reg->smin_value, smin_val, &smin) || 142d3: 4c 89 de mov %r11,%rsi 142d6: 49 03 74 24 28 add 0x28(%r12),%rsi 142db: 41 89 54 24 54 mov %edx,0x54(%r12) dst_reg->smax_value = S64_MAX; 142e0: 48 ba ff ff ff ff ff movabs $0x7fffffffffffffff,%rdx 142e7: ff ff 7f 142ea: 41 89 44 24 50 mov %eax,0x50(%r12) dst_reg->smin_value = S64_MIN; 142ef: 48 b8 00 00 00 00 00 movabs $0x8000000000000000,%rax 142f6: 00 00 80 if (check_add_overflow(dst_reg->smin_value, smin_val, &smin) || 142f9: 70 27 jo 14322 <adjust_reg_min_max_vals+0xde2> check_add_overflow(dst_reg->smax_value, smax_val, &smax)) { 142fb: 49 03 4c 24 30 add 0x30(%r12),%rcx 14300: 0f 90 c0 seto %al 14303: 48 89 ca mov %rcx,%rdx 14306: 0f b6 c0 movzbl %al,%eax dst_reg->smax_value = S64_MAX; 14309: 48 85 c0 test %rax,%rax 1430c: 48 b8 ff ff ff ff ff movabs $0x7fffffffffffffff,%rax 14313: ff ff 7f 14316: 48 0f 45 d0 cmovne %rax,%rdx 1431a: 48 8d 40 01 lea 0x1(%rax),%rax 1431e: 48 0f 44 c6 cmove %rsi,%rax Also uploaded complete disassembly for before[1] and after[2]. On a brief look it seems before this change it does lea, cmp, setg with a bit more branching, and after this change it uses add, seto, cmove/cmovene. Will look a bit more into this. 1: https://pastebin.com/NPkG1Ydd 2: https://pastebin.com/v9itLFnp