On Mon, Apr 3, 2023 at 10:29 AM Ilya Leoshkevich <iii@xxxxxxxxxxxxx> wrote: > > test_ksyms_module fails to emit a kfunc call targeting a module on > s390x, because the verifier stores the difference between kfunc > address and __bpf_call_base in bpf_insn.imm, which is s32, and modules > are roughly (1 << 42) bytes away from the kernel on s390x. > > Fix by keeping BTF id in bpf_insn.imm for BPF_PSEUDO_KFUNC_CALLs, > and storing the absolute address in bpf_kfunc_desc. > > Introduce bpf_jit_supports_far_kfunc_call() in order to limit this new > behavior to the s390x JIT. Otherwise other JITs need to be modified, > which is not desired. > > Introduce bpf_get_kfunc_addr() instead of exposing both > find_kfunc_desc() and struct bpf_kfunc_desc. > > In addition to sorting kfuncs by imm, also sort them by offset, in > order to handle conflicting imms from different modules. Do this on > all architectures in order to simplify code. > > Factor out resolving specialized kfuncs (XPD and dynptr) from > fixup_kfunc_call(). This was required in the first place, because > fixup_kfunc_call() uses find_kfunc_desc(), which returns a const > pointer, so it's not possible to modify kfunc addr without stripping > const, which is not nice. It also removes repetition of code like: > > if (bpf_jit_supports_far_kfunc_call()) > desc->addr = func; > else > insn->imm = BPF_CALL_IMM(func); > > and separates kfunc_desc_tab fixups from kfunc_call fixups. > > Suggested-by: Jiri Olsa <olsajiri@xxxxxxxxx> > Signed-off-by: Ilya Leoshkevich <iii@xxxxxxxxxxxxx> > --- > > v3: https://lore.kernel.org/bpf/20230222223714.80671-1-iii@xxxxxxxxxxxxx/ > v3 -> v4: Use Jiri's proposal and make it work on s390x. > > arch/s390/net/bpf_jit_comp.c | 5 ++ > include/linux/bpf.h | 2 + > include/linux/filter.h | 1 + > kernel/bpf/core.c | 11 ++++ > kernel/bpf/verifier.c | 116 +++++++++++++++++++++++++---------- > 5 files changed, 101 insertions(+), 34 deletions(-) > [...] > @@ -2452,6 +2453,11 @@ struct bpf_kfunc_btf { > }; > > struct bpf_kfunc_desc_tab { > + /* Sorted by func_id (BTF ID) and offset (fd_array offset) during > + * verification. JITs do lookups by bpf_insn, where func_id may not be > + * available, therefore at the end of verification do_misc_fixups() > + * sorts this by imm and offset. > + */ > struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; > u32 nr_descs; > }; > @@ -2492,6 +2498,19 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) > sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); > } > > +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, u16 offset, nit: offset is so generic, if it's fd array index, should we call it that: btf_fd_idx or something like that? > + u8 **func_addr) > +{ > + const struct bpf_kfunc_desc *desc; > + > + desc = find_kfunc_desc(prog, func_id, offset); > + if (!desc) > + return -EFAULT; > + > + *func_addr = (u8 *)desc->addr; > + return 0; > +} > + > static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, > s16 offset) > { > @@ -2672,7 +2691,8 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) > return -EINVAL; > } > > - call_imm = BPF_CALL_IMM(addr); > + call_imm = bpf_jit_supports_far_kfunc_call() ? func_id : > + BPF_CALL_IMM(addr); > /* Check whether or not the relative offset overflows desc->imm */ > if ((unsigned long)(s32)call_imm != call_imm) { > verbose(env, "address of kernel function %s is out of range\n", this check makes no sense and would have misleading message if bpf_jit_supports_far_kfunc_call(), so how about actually making it a proper if/else and doing check only if call_imm is a 32-bit address offset? > @@ -2690,6 +2710,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) > desc->func_id = func_id; > desc->imm = call_imm; > desc->offset = offset; > + desc->addr = addr; > err = btf_distill_func_proto(&env->log, desc_btf, > func_proto, func_name, > &desc->func_model); > @@ -2699,19 +2720,15 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) > return err; > } > > -static int kfunc_desc_cmp_by_imm(const void *a, const void *b) > +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) > { > const struct bpf_kfunc_desc *d0 = a; > const struct bpf_kfunc_desc *d1 = b; > > - if (d0->imm > d1->imm) > - return 1; > - else if (d0->imm < d1->imm) > - return -1; > - return 0; > + return d0->imm - d1->imm ?: d0->offset - d1->offset; very succinct, but does it work for all possible inputs? let's see: $ cat t.c #include <stdio.h> #include <limits.h> int main() { int x1 = INT_MAX; int x2 = INT_MIN; int d1 = x1 - x2; int d2 = x2 - x1; printf("x1 %d x2 %d d1 %d d2 %d\n", x1, x2, d1, d2); return 0; } $ cc t.c && ./a.out x1 2147483647 x2 -2147483648 d1 -1 d2 1 I believe d1 should be positive and d2 should be negative, though, right? So maybe let's not be too clever here and just do: if (d0->imm != d1->imm) return d0->imm < d10>imm ? -1 : 1: if (d0->off != d1->off) return d0->off < d10>off ? -1 : 1; return 0; Still succinct enough, I think. > } > > -static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) > +static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog) > { > struct bpf_kfunc_desc_tab *tab; > [...] > static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > struct bpf_insn *insn_buf, int insn_idx, int *cnt) > { > const struct bpf_kfunc_desc *desc; > - void *xdp_kfunc; > > if (!insn->imm) { > verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); > @@ -17306,16 +17372,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > > *cnt = 0; > > - if (bpf_dev_bound_kfunc_id(insn->imm)) { > - xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm); > - if (xdp_kfunc) { > - insn->imm = BPF_CALL_IMM(xdp_kfunc); > - return 0; > - } > - > - /* fallback to default kfunc when not supported by netdev */ > - } > - > /* insn->imm has the btf func_id. Replace it with > * an address (relative to __bpf_call_base). update comment mentioning bpf_jit_supports_far_kfunc_call() ? > */ > @@ -17326,7 +17382,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > return -EFAULT; > } > > - insn->imm = desc->imm; > + if (!bpf_jit_supports_far_kfunc_call()) > + insn->imm = BPF_CALL_IMM(desc->addr); > if (insn->off) > return 0; > if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { > @@ -17351,17 +17408,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { > insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); > *cnt = 1; > - } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { > - bool seen_direct_write = env->seen_direct_write; > - bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); > - > - if (is_rdonly) > - insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly); > - > - /* restore env->seen_direct_write to its original value, since > - * may_access_direct_pkt_data mutates it > - */ > - env->seen_direct_write = seen_direct_write; > } > return 0; > } > @@ -17384,6 +17430,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) > struct bpf_map *map_ptr; > int i, ret, cnt, delta = 0; > > + fixup_kfunc_desc_tab(env); > + > for (i = 0; i < insn_cnt; i++, insn++) { > /* Make divide-by-zero exceptions impossible. */ > if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || > @@ -17891,7 +17939,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) > } > } > > - sort_kfunc_descs_by_imm(env->prog); > + sort_kfunc_descs_by_imm_off(env->prog); > > return 0; > } > -- > 2.39.2 >