On Thu, 2023-08-24 at 22:32 +0200, Justin Iurman wrote: > Hello, > > I'm facing a verifier error and don't know how to make it happy (already > tried lots of checks). First, here is my env: > - OS: Ubuntu 22.04.3 LTS > - kernel: 5.15.0-79-generic x86_64 (CONFIG_DEBUG_INFO_BTF=y) > - clang version: 14.0.0-1ubuntu1.1 > - iproute2-5.15.0 with libbpf 0.5.0 > > And here is a simplified example of my program (basically, it will > insert in packets some bytes defined inside a map): > > #include "vmlinux.h" > #include <bpf/bpf_endian.h> > #include <bpf/bpf_helpers.h> > > #define MAX_BYTES 2048 > > struct xxx_t { > __u32 bytes_len; > __u8 bytes[MAX_BYTES]; > }; > > struct { > __uint(type, BPF_MAP_TYPE_ARRAY); > __uint(max_entries, 1); > __type(key, __u32); > __type(value, struct xxx_t); > __uint(pinning, LIBBPF_PIN_BY_NAME); > } my_map SEC(".maps"); > > char _license[] SEC("license") = "GPL"; > > SEC("egress") > int egress_handler(struct __sk_buff *skb) > { > void *data_end = (void *)(long)skb->data_end; > void *data = (void *)(long)skb->data; > struct ethhdr *eth = data; > struct ipv6hdr *ip6; > struct xxx_t *x; > __u32 offset; > __u32 idx = 0; > > offset = sizeof(*eth) + sizeof(*ip6); > if (data + offset > data_end) > return TC_ACT_OK; > > if (bpf_ntohs(eth->h_proto) != ETH_P_IPV6) > return TC_ACT_OK; > > x = bpf_map_lookup_elem(&my_map, &idx); > if (!x) > return TC_ACT_OK; > > if (x->bytes_len == 0 || x->bytes_len > MAX_BYTES) > return TC_ACT_OK; > > if (bpf_skb_adjust_room(skb, x->bytes_len, BPF_ADJ_ROOM_NET, 0)) > return TC_ACT_OK; > > if (bpf_skb_store_bytes(skb, offset, x->bytes, 8/*x->bytes_len*/, > BPF_F_RECOMPUTE_CSUM)) > return TC_ACT_SHOT; > > /* blah blah blah... */ > > return TC_ACT_OK; > } > > Let's focus on the line where bpf_skb_store_bytes is called. As is, with > a constant length (i.e., 8 for instance), the verifier is happy. > However, as soon as I try to use a map value as the length, it fails: > > [...] > ; if (bpf_skb_store_bytes(skb, offset, x->bytes, x->bytes_len, > 34: (bf) r1 = r7 > 35: (b7) r2 = 54 > 36: (bf) r3 = r8 > 37: (b7) r5 = 1 > 38: (85) call bpf_skb_store_bytes#9 > R0=inv0 R1_w=ctx(id=0,off=0,imm=0) R2_w=inv54 > R3_w=map_value(id=0,off=4,ks=4,vs=2052,imm=0) > R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R5_w=inv1 > R6_w=inv1 R7=ctx(id=0,off=0,imm=0) > R8_w=map_value(id=0,off=4,ks=4,vs=2052,imm=0) R10=fp0 fp-8=mmmm???? > invalid access to map value, value_size=2052 off=4 size=0 > R3 min value is outside of the allowed memory range > > I guess "size=0" is the problem here, but don't understand why. What > bothers me is that it looks like it's about R3 (i.e., x->bytes), not R4. > Anyway, I already tried to add a bunch of checks for both, but did not > succeed. Any idea? Hi Justin, John, The following fragment of C code: if (x->bytes_len == 0 || x->bytes_len > MAX_BYTES) return TC_ACT_OK; if (bpf_skb_adjust_room(skb, x->bytes_len, BPF_ADJ_ROOM_NET, 0)) return TC_ACT_OK; if (bpf_skb_store_bytes(skb, offset, x->bytes, x->bytes_len, BPF_F_RECOMPUTE_CSUM)) return TC_ACT_SHOT; Gets compiled to the following BPF: ; r8 is `x` at this point ; if (x->bytes_len == 0 || x->bytes_len > MAX_BYTES) 17: (61) r2 = *(u32 *)(r8 +0) ; R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) ; R8_w=map_value(off=0,ks=4,vs=2052,imm=0) 18: (bc) w1 = w2 ; R1_w=scalar(id=2,umax=4294967295,var_off=(0x0; 0xffffffff)) ; R2_w=scalar(id=2,umax=4294967295,var_off=(0x0; 0xffffffff)) 19: (04) w1 += -2049 ; R1_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) 20: (a6) if w1 < 0xfffff800 goto pc+16 ; R1_w=scalar(umin=4294965248,umax=4294967295, ; var_off=(0xfffff800; 0x7ff),s32_min=-2048,s32_max=-1) ; if (bpf_skb_adjust_room(skb, x->bytes_len, BPF_ADJ_ROOM_NET, 0)) 21: (bf) r1 = r6 ; R1_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0) 22: (b4) w3 = 0 ; R3_w=0 23: (b7) r4 = 0 ; R4_w=0 24: (85) call bpf_skb_adjust_room#50 ; R0=scalar() 25: (55) if r0 != 0x0 goto pc+11 ; R0=0 ; if (bpf_skb_store_bytes(skb, offset, x->bytes, x->bytes_len, 26: (61) r4 = *(u32 *)(r8 +0) ; R4_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) ; R8=map_value(off=0,ks=4,vs=2052,imm=0) 27: (07) r8 += 4 ; R8_w=map_value(off=4,ks=4,vs=2052,imm=0) 28: (bf) r1 = r6 ; R1_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0) 29: (b4) w2 = 54 ; R2_w=54 30: (bf) r3 = r8 ; R3_w=map_value(off=4,ks=4,vs=2052,imm=0) R8_w=map_value(off=4,ks=4,vs=2052,imm=0) 31: (b7) r5 = 1 ; R5_w=1 32: (85) call bpf_skb_store_bytes#9 Note the following instructions: - (17): x->bytes_len access; - (18,19,20): a curious way in which clang translates the (_ == 0 || _ > MAX_BYTES); - (26): x->bytes_len is re-read. Several observations: - because of (20) verifier can infer range for w1, but this range is not propagated to w2; - even if it was propagated verifier does not track range for values stored in "general memory", only for values in registers and values spilled to stack => known range for w2 does not imply known range for x->bytes_len. I can make it work with the following modification: int egress_handler(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; struct ethhdr *eth = data; struct ipv6hdr *ip6; struct xxx_t *x; __s32 bytes_len; // <------ signed ! __u32 offset; __u32 idx = 0; offset = sizeof(*eth) + sizeof(*ip6); if (data + offset > data_end) return TC_ACT_OK; if (bpf_ntohs(eth->h_proto) != ETH_P_IPV6) return TC_ACT_OK; x = bpf_map_lookup_elem(&my_map, &idx); if (!x) return TC_ACT_OK; bytes_len = x->bytes_len; // <----- use bytes_len everywhere below ! if (bytes_len <= 0 || bytes_len > MAX_BYTES) return TC_ACT_OK; if (bpf_skb_adjust_room(skb, bytes_len, BPF_ADJ_ROOM_NET, 0)) return TC_ACT_OK; if (bpf_skb_store_bytes(skb, offset, x->bytes, bytes_len, BPF_F_RECOMPUTE_CSUM)) return TC_ACT_SHOT; /* blah blah blah... */ return TC_ACT_OK; } After such change the following BPF is generated: ; bytes_len = x->bytes_len; 17: (61) r9 = *(u32 *)(r8 +0) ; R8_w=map_value(off=0,ks=4,vs=2052,imm=0) ; R9_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) ; if (bytes_len <= 0 || bytes_len > MAX_BYTES) 18: (c6) if w9 s< 0x1 goto pc+18 ; R9_w=scalar(umin=1,umax=2147483647,var_off=(0x0; 0x7fffffff)) 19: (66) if w9 s> 0x800 goto pc+17 ; R9_w=scalar(umin=1,umax=2048,var_off=(0x0; 0xfff)) ; if (bpf_skb_adjust_room(skb, bytes_len, BPF_ADJ_ROOM_NET, 0)) 20: (bf) r1 = r6 ; R1_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0) 21: (bc) w2 = w9 ; R2_w=scalar(id=2,umin=1,umax=2048,var_off=(0x0; 0xfff)) ; R9_w=scalar(id=2,umin=1,umax=2048,var_off=(0x0; 0xfff)) 22: (b4) w3 = 0 ; R3_w=0 23: (b7) r4 = 0 ; R4_w=0 24: (85) call bpf_skb_adjust_room#50 ; R0=scalar() 25: (55) if r0 != 0x0 goto pc+11 ; R0=0 ; if (bpf_skb_store_bytes(skb, offset, x->bytes, bytes_len, 26: (07) r8 += 4 ; R8_w=map_value(off=4,ks=4,vs=2052,imm=0) 27: (bf) r1 = r6 ; R1_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0) 28: (b4) w2 = 54 ; R2_w=54 29: (bf) r3 = r8 ; R3_w=map_value(off=4,ks=4,vs=2052,imm=0) ; R8_w=map_value(off=4,ks=4,vs=2052,imm=0) 30: (bc) w4 = w9 ; R4_w=scalar(id=2,umin=1,umax=2048,var_off=(0x0; 0xfff)) ; R9=scalar(id=2,umin=1,umax=2048,var_off=(0x0; 0xfff)) 31: (b7) r5 = 1 ; R5_w=1 32: (85) call bpf_skb_store_bytes#9 Note the following: - (17): x->bytes_len is in w9; - (18,19): range check is done w/o -= 2049 trick and verifier infers range for w9 as [1,2048]; - (30): w9 is reused as a parameter to bpf_skb_store_bytes with correct range. I think that main pain point here is "clever" (_ == 0 || _ > MAX_BYTES) translation, need to think a bit if it is possible to dissuade clang from such transformation (via change in clang). Alternatively, I think that doing (_ == 0 || _ > MAX_BYTES) check in inline assembly as two compare instructions should also work. Thanks, Eduard