On Tue, Jul 09, 2024 at 07:08:31PM GMT, Alexei Starovoitov wrote: > On Sun, Jun 30, 2024 at 10:59 PM Shung-Hsi Yu <shung-hsi.yu@xxxxxxxx> wrote: > > > > signed_add*_overflows() was added back when there was no overflow-check > > helper. With the introduction of such helpers in commit f0907827a8a91 > > ("compiler.h: enable builtin overflow checkers and add fallback code"), we > > can drop signed_add*_overflows() in kernel/bpf/verifier.c and use the > > generic check_add_overflow() instead. > > > > This will make future refactoring easier, and takes advantage of > > compiler-emitted hardware instructions that efficiently implement these > > checks. > > > > After the change GCC 13.3.0 generates cleaner assembly on x86_64: > > > > err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); > > 13625: mov 0x28(%rbx),%r9 /* r9 = src_reg->smin_value */ > > 13629: mov 0x30(%rbx),%rcx /* rcx = src_reg->smax_value */ > > ... > > if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || > > 141c1: mov %r9,%rax > > 141c4: add 0x28(%r12),%rax > > 141c9: mov %rax,0x28(%r12) > > 141ce: jo 146e4 <adjust_reg_min_max_vals+0x1294> > > check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) { > > 141d4: add 0x30(%r12),%rcx > > 141d9: mov %rcx,0x30(%r12) > > if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || > > 141de: jo 146e4 <adjust_reg_min_max_vals+0x1294> > > ... > > *dst_smin = S64_MIN; > > 146e4: movabs $0x8000000000000000,%rax > > 146ee: mov %rax,0x28(%r12) > > *dst_smax = S64_MAX; > > 146f3: sub $0x1,%rax > > 146f7: mov %rax,0x30(%r12) > > > > Before the change it gives: > > > > s64 smin_val = src_reg->smin_value; > > 675: mov 0x28(%rsi),%r8 > > s64 smax_val = src_reg->smax_value; > > u64 umin_val = src_reg->umin_value; > > u64 umax_val = src_reg->umax_value; > > 679: mov %rdi,%rax /* rax = dst_reg */ > > if (signed_add_overflows(dst_reg->smin_value, smin_val) || > > 67c: mov 0x28(%rdi),%rdi /* rdi = dst_reg->smin_value */ > > u64 umin_val = src_reg->umin_value; > > 680: mov 0x38(%rsi),%rdx > > u64 umax_val = src_reg->umax_value; > > 684: mov 0x40(%rsi),%rcx > > s64 res = (s64)((u64)a + (u64)b); > > 688: lea (%r8,%rdi,1),%r9 /* r9 = dst_reg->smin_value + src_reg->smin_value */ > > return res < a; > > 68c: cmp %r9,%rdi > > 68f: setg %r10b /* r10b = (dst_reg->smin_value + src_reg->smin_value) > dst_reg->smin_value */ > > if (b < 0) > > 693: test %r8,%r8 > > 696: js 72b <scalar_min_max_add+0xbb> > > signed_add_overflows(dst_reg->smax_value, smax_val)) { > > dst_reg->smin_value = S64_MIN; > > dst_reg->smax_value = S64_MAX; > > 69c: movabs $0x7fffffffffffffff,%rdi > > s64 smax_val = src_reg->smax_value; > > 6a6: mov 0x30(%rsi),%r8 > > dst_reg->smin_value = S64_MIN; > > 6aa: 00 00 00 movabs $0x8000000000000000,%rsi > > if (signed_add_overflows(dst_reg->smin_value, smin_val) || > > 6b4: test %r10b,%r10b /* (dst_reg->smin_value + src_reg->smin_value) > dst_reg->smin_value ? goto 6cb */ > > 6b7: jne 6cb <scalar_min_max_add+0x5b> > > signed_add_overflows(dst_reg->smax_value, smax_val)) { > > 6b9: mov 0x30(%rax),%r10 /* r10 = dst_reg->smax_value */ > > s64 res = (s64)((u64)a + (u64)b); > > 6bd: lea (%r10,%r8,1),%r11 /* r11 = dst_reg->smax_value + src_reg->smax_value */ > > if (b < 0) > > 6c1: test %r8,%r8 > > 6c4: js 71e <scalar_min_max_add+0xae> > > if (signed_add_overflows(dst_reg->smin_value, smin_val) || > > 6c6: cmp %r11,%r10 /* (dst_reg->smax_value + src_reg->smax_value) <= dst_reg->smax_value ? goto 723 */ > > 6c9: jle 723 <scalar_min_max_add+0xb3> > > } else { > > dst_reg->smin_value += smin_val; > > dst_reg->smax_value += smax_val; > > } > > 6cb: mov %rsi,0x28(%rax) > > ... > > 6d5: mov %rdi,0x30(%rax) > > ... > > if (signed_add_overflows(dst_reg->smin_value, smin_val) || > > 71e: cmp %r11,%r10 > > 721: jl 6cb <scalar_min_max_add+0x5b> > > dst_reg->smin_value += smin_val; > > 723: mov %r9,%rsi > > dst_reg->smax_value += smax_val; > > 726: mov %r11,%rdi > > 729: jmp 6cb <scalar_min_max_add+0x5b> > > return res > a; > > 72b: cmp %r9,%rdi > > 72e: setl %r10b > > 732: jmp 69c <scalar_min_max_add+0x2c> > > 737: nopw 0x0(%rax,%rax,1) > > > > Suggested-by: Jiri Olsa <jolsa@xxxxxxxxxx> > > Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@xxxxxxxx> > > --- > > kernel/bpf/verifier.c | 94 +++++++++++++------------------------------ > > 1 file changed, 28 insertions(+), 66 deletions(-) > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > > index d3927d819465..26c2b7527942 100644 > > --- a/kernel/bpf/verifier.c > > +++ b/kernel/bpf/verifier.c > > @@ -12725,26 +12725,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > > return 0; > > } > > > > -static bool signed_add_overflows(s64 a, s64 b) > > -{ > > - /* Do the add in u64, where overflow is well-defined */ > > - s64 res = (s64)((u64)a + (u64)b); > > - > > - if (b < 0) > > - return res > a; > > - return res < a; > > -} > > - > > -static bool signed_add32_overflows(s32 a, s32 b) > > -{ > > - /* Do the add in u32, where overflow is well-defined */ > > - s32 res = (s32)((u32)a + (u32)b); > > - > > - if (b < 0) > > - return res > a; > > - return res < a; > > -} > > Looks good, > unfortunately it gained conflict while sitting in the queue. > signed_add16_overflows() was introduced. > Could you please respin replacing signed_add16_overflows() > with check_add_overflow() ? Ack, will send respin a v3 patchset tomorrow.