On Thu, Apr 13, 2023 at 01:06:32AM +0200, Ilya Leoshkevich wrote: > test_ksyms_module fails to emit a kfunc call targeting a module on > s390x, because the verifier stores the difference between kfunc > address and __bpf_call_base in bpf_insn.imm, which is s32, and modules > are roughly (1 << 42) bytes away from the kernel on s390x. > > Fix by keeping BTF id in bpf_insn.imm for BPF_PSEUDO_KFUNC_CALLs, > and storing the absolute address in bpf_kfunc_desc. > > Introduce bpf_jit_supports_far_kfunc_call() in order to limit this new > behavior to the s390x JIT. Otherwise other JITs need to be modified, > which is not desired. > > Introduce bpf_get_kfunc_addr() instead of exposing both > find_kfunc_desc() and struct bpf_kfunc_desc. > > In addition to sorting kfuncs by imm, also sort them by offset, in > order to handle conflicting imms from different modules. Do this on > all architectures in order to simplify code. > > Factor out resolving specialized kfuncs (XPD and dynptr) from > fixup_kfunc_call(). This was required in the first place, because > fixup_kfunc_call() uses find_kfunc_desc(), which returns a const > pointer, so it's not possible to modify kfunc addr without stripping > const, which is not nice. It also removes repetition of code like: > > if (bpf_jit_supports_far_kfunc_call()) > desc->addr = func; > else > insn->imm = BPF_CALL_IMM(func); > > and separates kfunc_desc_tab fixups from kfunc_call fixups. > > Suggested-by: Jiri Olsa <olsajiri@xxxxxxxxx> > Signed-off-by: Ilya Leoshkevich <iii@xxxxxxxxxxxxx> Acked-by: Jiri Olsa <jolsa@xxxxxxxxxx> I'll rebase the test kfuncs move on top of this thanks, jirka > --- > > v6: https://lore.kernel.org/bpf/20230405213453.49756-1-iii@xxxxxxxxxxxxx/ > v6 -> v7: - Fix build with !CONFIG_BPF_SYSCALL by providing a dummy > implementation for this case. > Reported-by: kernel test robot <lkp@xxxxxxxxx> > Link: https://lore.kernel.org/oe-kbuild-all/202304060822.L9VsdUzS-lkp@xxxxxxxxx/ > - Specialize kfuncs when adding them (Jiri). > - Remove an unnecessary newline (Jiri). > - Add bpf_jit_supports_far_kfunc_call() check to > fixup_kfunc_call() (Jiri). Strictly speaking it's not > neccessary (we get the right value in all cases), but it > makes the logic more clear. > > v5: https://lore.kernel.org/bpf/20230405141407.172357-1-iii@xxxxxxxxxxxxx/ > v5 -> v6: Fix build with !CONFIG_BPF_SYSCALL by moving bpf_get_kfunc_addr() > declaration outside of the respective ifdef. > Reported-by: kernel test robot <lkp@xxxxxxxxx> > Link: https://lore.kernel.org/oe-kbuild-all/202304060240.OeUgnjzZ-lkp@xxxxxxxxx/ > Specialize kfunc when adding (Jiri). > Remove an unnecessary newline (Jiri). > Add bpf_jit_supports_far_kfunc_call() check to > fixup_kfunc_call() (Jiri). Strictly speaking it's not > neccessary, but it makes the logic less confusing. > > v4: https://lore.kernel.org/bpf/20230403172833.1552354-1-iii@xxxxxxxxxxxxx/ > v4 -> v5: Fix issues identified by Andrii: > - Improve bpf_get_kfunc_addr() argument naming. > - Do overflow check only in !bpf_jit_supports_far_kfunc_call() case. > - Fix kfunc_desc_cmp_by_imm_off() bug when passing huge values. > - Update fixup_kfunc_call() comment to reflect the new logic. > > v3: https://lore.kernel.org/bpf/20230222223714.80671-1-iii@xxxxxxxxxxxxx/ > v3 -> v4: Use Jiri's proposal and make it work on s390x. > > arch/s390/net/bpf_jit_comp.c | 5 ++ > include/linux/bpf.h | 10 +++ > include/linux/filter.h | 1 + > kernel/bpf/core.c | 11 ++++ > kernel/bpf/verifier.c | 123 +++++++++++++++++++++++------------ > 5 files changed, 110 insertions(+), 40 deletions(-) > > diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c > index d0846ba818ee..7102e4b674a0 100644 > --- a/arch/s390/net/bpf_jit_comp.c > +++ b/arch/s390/net/bpf_jit_comp.c > @@ -2001,6 +2001,11 @@ bool bpf_jit_supports_kfunc_call(void) > return true; > } > > +bool bpf_jit_supports_far_kfunc_call(void) > +{ > + return true; > +} > + > int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, > void *old_addr, void *new_addr) > { > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 2c6095bd7d69..88845aadc47d 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -2295,6 +2295,9 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); > const struct btf_func_model * > bpf_jit_find_kfunc_model(const struct bpf_prog *prog, > const struct bpf_insn *insn); > +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, > + u16 btf_fd_idx, u8 **func_addr); > + > struct bpf_core_ctx { > struct bpf_verifier_log *log; > const struct btf *btf; > @@ -2545,6 +2548,13 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, > return NULL; > } > > +static inline int > +bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, > + u16 btf_fd_idx, u8 **func_addr) > +{ > + return -ENOTSUPP; > +} > + > static inline bool unprivileged_ebpf_enabled(void) > { > return false; > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 5364b0c52c1d..bbce89937fde 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -920,6 +920,7 @@ void bpf_jit_compile(struct bpf_prog *prog); > bool bpf_jit_needs_zext(void); > bool bpf_jit_supports_subprog_tailcalls(void); > bool bpf_jit_supports_kfunc_call(void); > +bool bpf_jit_supports_far_kfunc_call(void); > bool bpf_helper_changes_pkt_data(void *func); > > static inline bool bpf_dump_raw_ok(const struct cred *cred) > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > index b297e9f60ca1..7a75fdfd707e 100644 > --- a/kernel/bpf/core.c > +++ b/kernel/bpf/core.c > @@ -1187,6 +1187,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, > s16 off = insn->off; > s32 imm = insn->imm; > u8 *addr; > + int err; > > *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL; > if (!*func_addr_fixed) { > @@ -1201,6 +1202,11 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, > addr = (u8 *)prog->aux->func[off]->bpf_func; > else > return -EINVAL; > + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && > + bpf_jit_supports_far_kfunc_call()) { > + err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr); > + if (err) > + return err; > } else { > /* Address of a BPF helper call. Since part of the core > * kernel, it's always at a fixed location. __bpf_call_base > @@ -2732,6 +2738,11 @@ bool __weak bpf_jit_supports_kfunc_call(void) > return false; > } > > +bool __weak bpf_jit_supports_far_kfunc_call(void) > +{ > + return false; > +} > + > /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call > * skb_copy_bits(), so provide a weak definition of it for NET-less config. > */ > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index d6db6de3e9ea..1514cf878a1d 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -195,6 +195,8 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env); > static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); > static int ref_set_non_owning(struct bpf_verifier_env *env, > struct bpf_reg_state *reg); > +static void specialize_kfunc(struct bpf_verifier_env *env, > + u32 func_id, u16 offset, unsigned long *addr); > > static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) > { > @@ -2374,6 +2376,7 @@ struct bpf_kfunc_desc { > u32 func_id; > s32 imm; > u16 offset; > + unsigned long addr; > }; > > struct bpf_kfunc_btf { > @@ -2383,6 +2386,11 @@ struct bpf_kfunc_btf { > }; > > struct bpf_kfunc_desc_tab { > + /* Sorted by func_id (BTF ID) and offset (fd_array offset) during > + * verification. JITs do lookups by bpf_insn, where func_id may not be > + * available, therefore at the end of verification do_misc_fixups() > + * sorts this by imm and offset. > + */ > struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; > u32 nr_descs; > }; > @@ -2423,6 +2431,19 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) > sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); > } > > +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, > + u16 btf_fd_idx, u8 **func_addr) > +{ > + const struct bpf_kfunc_desc *desc; > + > + desc = find_kfunc_desc(prog, func_id, btf_fd_idx); > + if (!desc) > + return -EFAULT; > + > + *func_addr = (u8 *)desc->addr; > + return 0; > +} > + > static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, > s16 offset) > { > @@ -2602,13 +2623,18 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) > func_name); > return -EINVAL; > } > + specialize_kfunc(env, func_id, offset, &addr); > > - call_imm = BPF_CALL_IMM(addr); > - /* Check whether or not the relative offset overflows desc->imm */ > - if ((unsigned long)(s32)call_imm != call_imm) { > - verbose(env, "address of kernel function %s is out of range\n", > - func_name); > - return -EINVAL; > + if (bpf_jit_supports_far_kfunc_call()) { > + call_imm = func_id; > + } else { > + call_imm = BPF_CALL_IMM(addr); > + /* Check whether the relative offset overflows desc->imm */ > + if ((unsigned long)(s32)call_imm != call_imm) { > + verbose(env, "address of kernel function %s is out of range\n", > + func_name); > + return -EINVAL; > + } > } > > if (bpf_dev_bound_kfunc_id(func_id)) { > @@ -2621,6 +2647,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) > desc->func_id = func_id; > desc->imm = call_imm; > desc->offset = offset; > + desc->addr = addr; > err = btf_distill_func_proto(&env->log, desc_btf, > func_proto, func_name, > &desc->func_model); > @@ -2630,19 +2657,19 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) > return err; > } > > -static int kfunc_desc_cmp_by_imm(const void *a, const void *b) > +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) > { > const struct bpf_kfunc_desc *d0 = a; > const struct bpf_kfunc_desc *d1 = b; > > - if (d0->imm > d1->imm) > - return 1; > - else if (d0->imm < d1->imm) > - return -1; > + if (d0->imm != d1->imm) > + return d0->imm < d1->imm ? -1 : 1; > + if (d0->offset != d1->offset) > + return d0->offset < d1->offset ? -1 : 1; > return 0; > } > > -static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) > +static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog) > { > struct bpf_kfunc_desc_tab *tab; > > @@ -2651,7 +2678,7 @@ static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) > return; > > sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), > - kfunc_desc_cmp_by_imm, NULL); > + kfunc_desc_cmp_by_imm_off, NULL); > } > > bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) > @@ -2665,13 +2692,14 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, > { > const struct bpf_kfunc_desc desc = { > .imm = insn->imm, > + .offset = insn->off, > }; > const struct bpf_kfunc_desc *res; > struct bpf_kfunc_desc_tab *tab; > > tab = prog->aux->kfunc_tab; > res = bsearch(&desc, tab->descs, tab->nr_descs, > - sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm); > + sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off); > > return res ? &res->func_model : NULL; > } > @@ -17293,11 +17321,45 @@ static int fixup_call_args(struct bpf_verifier_env *env) > return err; > } > > +/* replace a generic kfunc with a specialized version if necessary */ > +static void specialize_kfunc(struct bpf_verifier_env *env, > + u32 func_id, u16 offset, unsigned long *addr) > +{ > + struct bpf_prog *prog = env->prog; > + bool seen_direct_write; > + void *xdp_kfunc; > + bool is_rdonly; > + > + if (bpf_dev_bound_kfunc_id(func_id)) { > + xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); > + if (xdp_kfunc) { > + *addr = (unsigned long)xdp_kfunc; > + return; > + } > + /* fallback to default kfunc when not supported by netdev */ > + } > + > + if (offset) > + return; > + > + if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { > + seen_direct_write = env->seen_direct_write; > + is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); > + > + if (is_rdonly) > + *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; > + > + /* restore env->seen_direct_write to its original value, since > + * may_access_direct_pkt_data mutates it > + */ > + env->seen_direct_write = seen_direct_write; > + } > +} > + > static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > struct bpf_insn *insn_buf, int insn_idx, int *cnt) > { > const struct bpf_kfunc_desc *desc; > - void *xdp_kfunc; > > if (!insn->imm) { > verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); > @@ -17306,18 +17368,9 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > > *cnt = 0; > > - if (bpf_dev_bound_kfunc_id(insn->imm)) { > - xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm); > - if (xdp_kfunc) { > - insn->imm = BPF_CALL_IMM(xdp_kfunc); > - return 0; > - } > - > - /* fallback to default kfunc when not supported by netdev */ > - } > - > - /* insn->imm has the btf func_id. Replace it with > - * an address (relative to __bpf_call_base). > + /* insn->imm has the btf func_id. Replace it with an offset relative to > + * __bpf_call_base, unless the JIT needs to call functions that are > + * further than 32 bits away (bpf_jit_supports_far_kfunc_call()). > */ > desc = find_kfunc_desc(env->prog, insn->imm, insn->off); > if (!desc) { > @@ -17326,7 +17379,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > return -EFAULT; > } > > - insn->imm = desc->imm; > + if (!bpf_jit_supports_far_kfunc_call()) > + insn->imm = BPF_CALL_IMM(desc->addr); > if (insn->off) > return 0; > if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { > @@ -17351,17 +17405,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { > insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); > *cnt = 1; > - } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { > - bool seen_direct_write = env->seen_direct_write; > - bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); > - > - if (is_rdonly) > - insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly); > - > - /* restore env->seen_direct_write to its original value, since > - * may_access_direct_pkt_data mutates it > - */ > - env->seen_direct_write = seen_direct_write; > } > return 0; > } > @@ -17891,7 +17934,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) > } > } > > - sort_kfunc_descs_by_imm(env->prog); > + sort_kfunc_descs_by_imm_off(env->prog); > > return 0; > } > -- > 2.39.2 >