On Wed, Apr 6, 2022 at 5:12 PM Nikolay Borisov <nborisov@xxxxxxxx> wrote: > > Hello, > > I get a validator error with the following function: > > #define MAX_PERCPU_BUFSIZE (1<<15) > #define PATH_MAX 4096 > #define MAX_PATH_COMPONENTS 20 > #define IDX(x) ((x) & (MAX_PERCPU_BUFSIZE-1)) > > struct buf_t { > u8 buf[MAX_PERCPU_BUFSIZE]; > }; > > struct { > __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); > __uint(max_entries, 1); > __type(key, u32); > __type(value, struct buf_t); > } buf_map SEC(".maps"); > > static __always_inline u32 get_dentry_path_str(struct dentry* dentry, > struct buf_t *string_p) > { > const char slash = '/'; > const int zero = 0; > > u32 buf_off = MAX_PERCPU_BUFSIZE - 1; > > #pragma unroll > for (int i = 0; i < MAX_PATH_COMPONENTS; i++) { > struct dentry *d_parent = BPF_CORE_READ(dentry, d_parent); > if (dentry == d_parent) { > break; > } > // Add this dentry name to path > struct qstr d_name = BPF_CORE_READ(dentry, d_name); > // Ensure path is no longer than PATH_MAX-1 and copy the terminating NULL > unsigned int len = (d_name.len+1) & (PATH_MAX-1); > // Start writing from the end of the buffer > unsigned int off = buf_off - len; > // Is string buffer big enough for dentry name? > int sz = 0; > // verify no wrap occurred > if (off <= buf_off) > sz = bpf_probe_read_kernel_str(&string_p->buf[IDX(off)], len, (void *)d_name.name); > else > break; > > if (sz > 1) { > buf_off -= 1; // replace null byte termination with slash sign > bpf_probe_read(&(string_p->buf[IDX(buf_off)]), 1, &slash); > buf_off -= sz - 1; > } else { > // If sz is 0 or 1 we have an error (path can't be null nor an empty string) > break; > } > dentry = d_parent; > } > > // Add leading slash > buf_off -= 1; > bpf_probe_read(&(string_p->buf[IDX(buf_off)]), 1, &slash); > // Null terminate the path string, this replaces the final / with a null > // char > bpf_probe_read(&(string_p->buf[MAX_PERCPU_BUFSIZE-1]), 1, &zero); > return buf_off; > } > > Here are the last couple of instructions where off is being calculated. > > ; unsigned int len = (d_name.len+1) & (PATH_MAX-1); > 54: (57) r9 &= 4095 ; R9_w=inv(id=0,umax_value=4095,var_off=(0x0; 0xfff)) > ; unsigned int off = buf_off - len; > 55: (bf) r8 = r9 ; R8_w=inv(id=4,umax_value=4095,var_off=(0x0; 0xfff)) R9_w=inv(id=4,umax_value=4095,var_off=(0x0; 0xfff)) > 56: (a7) r8 ^= 32767 ; R8_w=inv(id=0,umin_value=28672,umax_value=32767,var_off=(0x7000; 0xfff)) > ; sz = bpf_probe_read_kernel_str(&string_p->buf[IDX(off)], len, (void *)d_name.name); > 57: (79) r6 = *(u64 *)(r10 -72) ; R6_w=map_value(id=0,off=0,ks=4,vs=32768,imm=0) R10=fp0 > 58: (0f) r6 += r8 ; R6_w=map_value(id=0,off=0,ks=4,vs=32768,umin_value=28672,umax_value=32767,var_off=(0x7000; 0xfff)) R8_w=invP(id=0,umin_value=28672,umax_value=32767,var_off=(0x7000; 0xfff)) > ; sz = bpf_probe_read_kernel_str(&string_p->buf[IDX(off)], len, (void *)d_name.name); > 59: (79) r3 = *(u64 *)(r1 +8) ; R1_w=fp-24 R3_w=inv(id=0) > ; sz = bpf_probe_read_kernel_str(&string_p->buf[IDX(off)], len, (void *)d_name.name); > 60: (bf) r1 = r6 ; R1_w=map_value(id=0,off=0,ks=4,vs=32768,umin_value=28672,umax_value=32767,var_off=(0x7000; 0xfff)) R6_w=map_value(id=0,off=0,ks=4,vs=32768,umin_value=28672,umax_value=32767,var_off=(0x7000; 0xfff)) > 61: (bf) r2 = r9 ; R2_w=inv(id=4,umax_value=4095,var_off=(0x0; 0xfff)) R9_w=inv(id=4,umax_value=4095,var_off=(0x0; 0xfff)) > 62: (85) call bpf_probe_read_kernel_str#115 > invalid access to map value, value_size=32768 off=32767 size=4095 > R1 max value is outside of the allowed memory range > > > From what I gathered it seems that in the bpf_probe_read_kernel_str the validator is not > able to prove that off+len is always going to be at most MAX_PERCPU_BUFSIZE - 1 which is well within > the bounds of the buffer. My logic goes as following: > > buf_off is first set to 32767, we get the first dentry and calculate its len + null terminating char which is going to be at most > 4095, afterwards 'off' is being calculated as buf_off - len and finally access to the percpu array is performed via IDX(off) which guarantees the access is > bounded by MAX_PERCPU_BUFSIZE - 1. IDX(off) is bounded, but verifier needs to be sure that `off + len` never goes beyond map value. so you should make sure and prove off <= MAX_PERCPU_BUFSIZE - PATH_MAX. Verifier actually caught a real bug for you. > > This code was originally based off https://github.com/aquasecurity/tracee/blob/main/pkg/ebpf/c/tracee.bpf.c#L1721-L1777 however it seems > that the way tracee author work around this is to simply start from the middle of the buffer, i.e set buf_off initially to MAX_PERCPU_BUFSIZE>>1 and adjust the > array accesses accordingly when doing the masking.