On 19/07/2024 09:17, Shung-Hsi Yu wrote: > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 8da132a1ef28..f6827f9e2076 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -13466,6 +13466,40 @@ static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, > } > } > > +/* Clears all trailing bits after the most significant unset bit. > + * > + * Used for estimating the minimum possible value after BPF_AND. This > + * effectively rounds a negative value down to a negative power-of-2 value > + * (except for -1, which just return -1) and returning 0 for non-negative > + * values. E.g. negative32_bit_floor(0xff0ff0ff) == 0xff000000. > + */ > +static inline s32 negative32_bit_floor(s32 v) > +{ > + u8 bits = fls(~v); /* find most-significant unset bit */ > + u32 delta; > + > + /* special case, needed because 1UL << 32 is undefined */ > + if (bits > 31) > + return 0; If I'm understanding correctly: this case happens when the input is nonnegative: v >= 0 means ~v's msb is set, so fls(~v) = 32. Might be worth calling that out in the comment. > + > + delta = (1UL << bits) - 1; > + return ~delta; > +} > + > +/* Same as negative32_bit_floor() above, but for 64-bit signed value */ > +static inline s64 negative_bit_floor(s64 v) > +{ > + u8 bits = fls64(~v); /* find most-significant unset bit */ > + u64 delta; > + > + /* special case, needed because 1ULL << 64 is undefined */ > + if (bits > 63) > + return 0; > + > + delta = (1ULL << bits) - 1; > + return ~delta; > +} > + > static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, > struct bpf_reg_state *src_reg) > { > @@ -13485,16 +13519,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, > dst_reg->u32_min_value = var32_off.value; > dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val); > > - /* Safe to set s32 bounds by casting u32 result into s32 when u32 > - * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. > - */ > - if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) { > - dst_reg->s32_min_value = dst_reg->u32_min_value; > - dst_reg->s32_max_value = dst_reg->u32_max_value; > - } else { > - dst_reg->s32_min_value = S32_MIN; > - dst_reg->s32_max_value = S32_MAX; > - } > + /* Handle the [-1, 0] & -CONSTANT case that's difficult for tnum */ > + dst_reg->s32_min_value = negative32_bit_floor(min(dst_reg->s32_min_value, > + src_reg->s32_min_value)); > + dst_reg->s32_max_value = max(dst_reg->s32_max_value, src_reg->s32_max_value); Either comment or commit message could maybe point out that the work the deleted code was doing (propagating u32 bounds into s32) is done by the caller later via __reg_deduce_bounds(). It's a bit of a shame that we can't get the sharp bound [-13, 0] in your example, for which we technically have the information we need — src_reg being constant means its tnum carries information that negative_bit_floor(smin_value) is throwing away — but I don't see an efficient way to handle the case (it happens basically because only one operand crosses the sign boundary, so the other operand's tnum is informative) without going down the range-splitting road you (I think rightly) discarded as unnecessarily complex. Apart from that this all LGTM, so: Reviewed-by: Edward Cree <ecree.xilinx@xxxxxxxxx>