[PATCH v3 bpf-next 7/8] bpf: smarter verifier log number printing logic

Andrii Nakryiko <andrii@xxxxxxxxxx> · Fri, 17 Nov 2023 19:46:22 -0800

Instead of always printing numbers as either decimals (and in some
cases, like for "imm=%llx", in hexadecimals), decide the form based on
actual values. For numbers in a reasonably small range (currently,
[0, U16_MAX] for unsigned values, and [S16_MIN, S16_MAX] for signed ones),
emit them as decimals. In all other cases, even for signed values,
emit them in hexadecimals.

For large values hex form is often times way more useful: it's easier to
see an exact difference between 0xffffffff80000000 and 0xffffffff7fffffff,
than between 18446744071562067966 and 18446744071562067967, as one
particular example.

Small values representing small pointer offsets or application
constants, on the other hand, are way more useful to be represented in
decimal notation.

Adjust reg_bounds register state parsing logic to take into account this
change.

Acked-by: Eduard Zingerman <eddyz87@xxxxxxxxx>
Acked-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
---
 kernel/bpf/log.c                              | 79 ++++++++++++++++---
 .../selftests/bpf/prog_tests/reg_bounds.c     | 53 ++++++++-----
 .../selftests/bpf/progs/exceptions_assert.c   | 32 ++++----
 3 files changed, 118 insertions(+), 46 deletions(-)

diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 20b4f81087da..87105aa482ed 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -509,10 +509,52 @@ static void print_liveness(struct bpf_verifier_env *env,
 		verbose(env, "D");
 }
 
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool is_unum_decimal(u64 num)
+{
+	return num <= UNUM_MAX_DECIMAL;
+}
+
+static bool is_snum_decimal(s64 num)
+{
+	return num >= SNUM_MIN_DECIMAL && num <= SNUM_MAX_DECIMAL;
+}
+
+static void verbose_unum(struct bpf_verifier_env *env, u64 num)
+{
+	if (is_unum_decimal(num))
+		verbose(env, "%llu", num);
+	else
+		verbose(env, "%#llx", num);
+}
+
+static void verbose_snum(struct bpf_verifier_env *env, s64 num)
+{
+	if (is_snum_decimal(num))
+		verbose(env, "%lld", num);
+	else
+		verbose(env, "%#llx", num);
+}
+
 static void print_scalar_ranges(struct bpf_verifier_env *env,
 				const struct bpf_reg_state *reg,
 				const char **sep)
 {
+	/* For signed ranges, we want to unify 64-bit and 32-bit values in the
+	 * output as much as possible, but there is a bit of a complication.
+	 * If we choose to print values as decimals, this is natural to do,
+	 * because negative 64-bit and 32-bit values >= -S32_MIN have the same
+	 * representation due to sign extension. But if we choose to print
+	 * them in hex format (see is_snum_decimal()), then sign extension is
+	 * misleading.
+	 * E.g., smin=-2 and smin32=-2 are exactly the same in decimal, but in
+	 * hex they will be smin=0xfffffffffffffffe and smin32=0xfffffffe, two
+	 * very different numbers.
+	 * So we avoid sign extension if we choose to print values in hex.
+	 */
 	struct {
 		const char *name;
 		u64 val;
@@ -522,8 +564,14 @@ static void print_scalar_ranges(struct bpf_verifier_env *env,
 		{"smax",   reg->smax_value,         reg->smax_value == S64_MAX},
 		{"umin",   reg->umin_value,         reg->umin_value == 0},
 		{"umax",   reg->umax_value,         reg->umax_value == U64_MAX},
-		{"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN},
-		{"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX},
+		{"smin32",
+		 is_snum_decimal((s64)reg->s32_min_value)
+			 ? (s64)reg->s32_min_value
+			 : (u32)reg->s32_min_value, reg->s32_min_value == S32_MIN},
+		{"smax32",
+		 is_snum_decimal((s64)reg->s32_max_value)
+			 ? (s64)reg->s32_max_value
+			 : (u32)reg->s32_max_value, reg->s32_max_value == S32_MAX},
 		{"umin32", reg->u32_min_value,      reg->u32_min_value == 0},
 		{"umax32", reg->u32_max_value,      reg->u32_max_value == U32_MAX},
 	}, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
@@ -549,7 +597,10 @@ static void print_scalar_ranges(struct bpf_verifier_env *env,
 			verbose(env, "%s=", m2->name);
 		}
 
-		verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val);
+		if (m1->name[0] == 's')
+			verbose_snum(env, m1->val);
+		else
+			verbose_unum(env, m1->val);
 	}
 }
 
@@ -576,14 +627,14 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s
 	    tnum_is_const(reg->var_off)) {
 		/* reg->off should be 0 for SCALAR_VALUE */
 		verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
-		verbose(env, "%lld", reg->var_off.value + reg->off);
+		verbose_snum(env, reg->var_off.value + reg->off);
 		return;
 	}
 /*
  * _a stands for append, was shortened to avoid multiline statements below.
  * This macro is used to output a comma separated list of attributes.
  */
-#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; })
 
 	verbose(env, "%s", reg_type_str(env, t));
 	if (base_type(t) == PTR_TO_BTF_ID)
@@ -602,14 +653,20 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s
 			  reg->map_ptr->key_size,
 			  reg->map_ptr->value_size);
 	}
-	if (t != SCALAR_VALUE && reg->off)
-		verbose_a("off=%d", reg->off);
-	if (type_is_pkt_pointer(t))
-		verbose_a("r=%d", reg->range);
+	if (t != SCALAR_VALUE && reg->off) {
+		verbose_a("off=");
+		verbose_snum(env, reg->off);
+	}
+	if (type_is_pkt_pointer(t)) {
+		verbose_a("r=");
+		verbose_unum(env, reg->range);
+	}
 	if (tnum_is_const(reg->var_off)) {
 		/* a pointer register with fixed offset */
-		if (reg->var_off.value)
-			verbose_a("imm=%llx", reg->var_off.value);
+		if (reg->var_off.value) {
+			verbose_a("imm=");
+			verbose_snum(env, reg->var_off.value);
+		}
 	} else {
 		print_scalar_ranges(env, reg, &sep);
 		if (!tnum_is_unknown(reg->var_off)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 7a8b0bf0a7f8..fd4ab23e6f54 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -13,10 +13,13 @@
  */
 #define U64_MAX ((u64)UINT64_MAX)
 #define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
 #define S64_MIN ((s64)INT64_MIN)
 #define S64_MAX ((s64)INT64_MAX)
 #define S32_MIN ((s32)INT_MIN)
 #define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
 
 typedef unsigned long long ___u64;
 typedef unsigned int ___u32;
@@ -138,13 +141,17 @@ static enum num_t t_unsigned(enum num_t t)
 	}
 }
 
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
 static bool num_is_small(enum num_t t, u64 x)
 {
 	switch (t) {
-	case U64: return (u64)x <= 256;
-	case U32: return (u32)x <= 256;
-	case S64: return (s64)x >= -256 && (s64)x <= 256;
-	case S32: return (s32)x >= -256 && (s32)x <= 256;
+	case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+	case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+	case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+	case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
 	default: printf("num_is_small!\n"); exit(1);
 	}
 }
@@ -1023,20 +1030,19 @@ static int parse_reg_state(const char *s, struct reg_state *reg)
 	 */
 	struct {
 		const char *pfx;
-		const char *fmt;
 		u64 *dst, def;
 		bool is_32, is_set;
 	} *f, fields[8] = {
-		{"smin=", "%lld", &reg->r[S64].a, S64_MIN},
-		{"smax=", "%lld", &reg->r[S64].b, S64_MAX},
-		{"umin=", "%llu", &reg->r[U64].a, 0},
-		{"umax=", "%llu", &reg->r[U64].b, U64_MAX},
-		{"smin32=", "%lld", &reg->r[S32].a, (u32)S32_MIN, true},
-		{"smax32=", "%lld", &reg->r[S32].b, (u32)S32_MAX, true},
-		{"umin32=", "%llu", &reg->r[U32].a, 0,            true},
-		{"umax32=", "%llu", &reg->r[U32].b, U32_MAX,      true},
+		{"smin=", &reg->r[S64].a, S64_MIN},
+		{"smax=", &reg->r[S64].b, S64_MAX},
+		{"umin=", &reg->r[U64].a, 0},
+		{"umax=", &reg->r[U64].b, U64_MAX},
+		{"smin32=", &reg->r[S32].a, (u32)S32_MIN, true},
+		{"smax32=", &reg->r[S32].b, (u32)S32_MAX, true},
+		{"umin32=", &reg->r[U32].a, 0,            true},
+		{"umax32=", &reg->r[U32].b, U32_MAX,      true},
 	};
-	const char *p, *fmt;
+	const char *p;
 	int i;
 
 	p = strchr(s, '=');
@@ -1050,8 +1056,13 @@ static int parse_reg_state(const char *s, struct reg_state *reg)
 		long long sval;
 		enum num_t t;
 
-		if (sscanf(p, "%lld", &sval) != 1)
-			return -EINVAL;
+		if (p[0] == '0' && p[1] == 'x') {
+			if (sscanf(p, "%llx", &sval) != 1)
+				return -EINVAL;
+		} else {
+			if (sscanf(p, "%lld", &sval) != 1)
+				return -EINVAL;
+		}
 
 		reg->valid = true;
 		for (t = first_t; t <= last_t; t++) {
@@ -1075,9 +1086,13 @@ static int parse_reg_state(const char *s, struct reg_state *reg)
 
 		if (mcnt) {
 			/* populate all matched fields */
-			fmt = fields[midxs[0]].fmt;
-			if (sscanf(p, fmt, &val) != 1)
-				return -EINVAL;
+			if (p[0] == '0' && p[1] == 'x') {
+				if (sscanf(p, "%llx", &val) != 1)
+					return -EINVAL;
+			} else {
+				if (sscanf(p, "%lld", &val) != 1)
+					return -EINVAL;
+			}
 
 			for (i = 0; i < mcnt; i++) {
 				f = &fields[midxs[i]];
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 26f7d67432cc..49efaed143fc 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,48 +18,48 @@
 		return *(u64 *)num;					\
 	}
 
-__msg(": R0_w=-2147483648 R10=fp0")
+__msg(": R0_w=0xffffffff80000000 R10=fp0")
 check_assert(s64, eq, int_min, INT_MIN);
-__msg(": R0_w=2147483647 R10=fp0")
+__msg(": R0_w=0x7fffffff R10=fp0")
 check_assert(s64, eq, int_max, INT_MAX);
 __msg(": R0_w=0 R10=fp0")
 check_assert(s64, eq, zero, 0);
-__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0")
 check_assert(s64, eq, llong_min, LLONG_MIN);
-__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0")
 check_assert(s64, eq, llong_max, LLONG_MAX);
 
-__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0")
 check_assert(s64, lt, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0")
 check_assert(s64, le, pos, INT_MAX);
 __msg(": R0_w=scalar(smax=0) R10=fp0")
 check_assert(s64, le, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, le, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0")
 check_assert(s64, gt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, ge, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
 check_assert(s64, ge, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0")
 check_assert(s64, ge, neg, INT_MIN);
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx() R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
 int check_assert_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
-- 
2.34.1