Re: [PATCH bpf-next v4] bpf: Support 64-bit pointers to kfuncs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Apr 3, 2023 at 10:29 AM Ilya Leoshkevich <iii@xxxxxxxxxxxxx> wrote:
>
> test_ksyms_module fails to emit a kfunc call targeting a module on
> s390x, because the verifier stores the difference between kfunc
> address and __bpf_call_base in bpf_insn.imm, which is s32, and modules
> are roughly (1 << 42) bytes away from the kernel on s390x.
>
> Fix by keeping BTF id in bpf_insn.imm for BPF_PSEUDO_KFUNC_CALLs,
> and storing the absolute address in bpf_kfunc_desc.
>
> Introduce bpf_jit_supports_far_kfunc_call() in order to limit this new
> behavior to the s390x JIT. Otherwise other JITs need to be modified,
> which is not desired.
>
> Introduce bpf_get_kfunc_addr() instead of exposing both
> find_kfunc_desc() and struct bpf_kfunc_desc.
>
> In addition to sorting kfuncs by imm, also sort them by offset, in
> order to handle conflicting imms from different modules. Do this on
> all architectures in order to simplify code.
>
> Factor out resolving specialized kfuncs (XPD and dynptr) from
> fixup_kfunc_call(). This was required in the first place, because
> fixup_kfunc_call() uses find_kfunc_desc(), which returns a const
> pointer, so it's not possible to modify kfunc addr without stripping
> const, which is not nice. It also removes repetition of code like:
>
>         if (bpf_jit_supports_far_kfunc_call())
>                 desc->addr = func;
>         else
>                 insn->imm = BPF_CALL_IMM(func);
>
> and separates kfunc_desc_tab fixups from kfunc_call fixups.
>
> Suggested-by: Jiri Olsa <olsajiri@xxxxxxxxx>
> Signed-off-by: Ilya Leoshkevich <iii@xxxxxxxxxxxxx>
> ---
>
> v3: https://lore.kernel.org/bpf/20230222223714.80671-1-iii@xxxxxxxxxxxxx/
> v3 -> v4: Use Jiri's proposal and make it work on s390x.
>
>  arch/s390/net/bpf_jit_comp.c |   5 ++
>  include/linux/bpf.h          |   2 +
>  include/linux/filter.h       |   1 +
>  kernel/bpf/core.c            |  11 ++++
>  kernel/bpf/verifier.c        | 116 +++++++++++++++++++++++++----------
>  5 files changed, 101 insertions(+), 34 deletions(-)
>

[...]

> @@ -2452,6 +2453,11 @@ struct bpf_kfunc_btf {
>  };
>
>  struct bpf_kfunc_desc_tab {
> +       /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
> +        * verification. JITs do lookups by bpf_insn, where func_id may not be
> +        * available, therefore at the end of verification do_misc_fixups()
> +        * sorts this by imm and offset.
> +        */
>         struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
>         u32 nr_descs;
>  };
> @@ -2492,6 +2498,19 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
>                        sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
>  }
>
> +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, u16 offset,

nit: offset is so generic, if it's fd array index, should we call it
that: btf_fd_idx or something like that?

> +                      u8 **func_addr)
> +{
> +       const struct bpf_kfunc_desc *desc;
> +
> +       desc = find_kfunc_desc(prog, func_id, offset);
> +       if (!desc)
> +               return -EFAULT;
> +
> +       *func_addr = (u8 *)desc->addr;
> +       return 0;
> +}
> +
>  static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
>                                          s16 offset)
>  {
> @@ -2672,7 +2691,8 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
>                 return -EINVAL;
>         }
>
> -       call_imm = BPF_CALL_IMM(addr);
> +       call_imm = bpf_jit_supports_far_kfunc_call() ? func_id :
> +                                                      BPF_CALL_IMM(addr);
>         /* Check whether or not the relative offset overflows desc->imm */
>         if ((unsigned long)(s32)call_imm != call_imm) {
>                 verbose(env, "address of kernel function %s is out of range\n",

this check makes no sense and would have misleading message if
bpf_jit_supports_far_kfunc_call(), so how about actually making it a
proper if/else and doing check only if call_imm is a 32-bit address
offset?

> @@ -2690,6 +2710,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
>         desc->func_id = func_id;
>         desc->imm = call_imm;
>         desc->offset = offset;
> +       desc->addr = addr;
>         err = btf_distill_func_proto(&env->log, desc_btf,
>                                      func_proto, func_name,
>                                      &desc->func_model);
> @@ -2699,19 +2720,15 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
>         return err;
>  }
>
> -static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
> +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
>  {
>         const struct bpf_kfunc_desc *d0 = a;
>         const struct bpf_kfunc_desc *d1 = b;
>
> -       if (d0->imm > d1->imm)
> -               return 1;
> -       else if (d0->imm < d1->imm)
> -               return -1;
> -       return 0;
> +       return d0->imm - d1->imm ?: d0->offset - d1->offset;

very succinct, but does it work for all possible inputs? let's see:

$ cat t.c
#include <stdio.h>
#include <limits.h>

int main() {
        int x1 = INT_MAX;
        int x2 = INT_MIN;
        int d1 = x1 - x2;
        int d2 = x2 - x1;

        printf("x1 %d x2 %d d1 %d d2 %d\n", x1, x2, d1, d2);
        return 0;
}
$ cc t.c && ./a.out
x1 2147483647 x2 -2147483648 d1 -1 d2 1

I believe d1 should be positive and d2 should be negative, though,
right? So maybe let's not be too clever here and just do:

if (d0->imm != d1->imm)
    return d0->imm < d10>imm ? -1 : 1:
if (d0->off != d1->off)
    return d0->off < d10>off ? -1 : 1;
return 0;

Still succinct enough, I think.


>  }
>
> -static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
> +static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
>  {
>         struct bpf_kfunc_desc_tab *tab;
>

[...]

>  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                             struct bpf_insn *insn_buf, int insn_idx, int *cnt)
>  {
>         const struct bpf_kfunc_desc *desc;
> -       void *xdp_kfunc;
>
>         if (!insn->imm) {
>                 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
> @@ -17306,16 +17372,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>
>         *cnt = 0;
>
> -       if (bpf_dev_bound_kfunc_id(insn->imm)) {
> -               xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
> -               if (xdp_kfunc) {
> -                       insn->imm = BPF_CALL_IMM(xdp_kfunc);
> -                       return 0;
> -               }
> -
> -               /* fallback to default kfunc when not supported by netdev */
> -       }
> -
>         /* insn->imm has the btf func_id. Replace it with
>          * an address (relative to __bpf_call_base).

update comment mentioning bpf_jit_supports_far_kfunc_call() ?

>          */
> @@ -17326,7 +17382,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                 return -EFAULT;
>         }
>
> -       insn->imm = desc->imm;
> +       if (!bpf_jit_supports_far_kfunc_call())
> +               insn->imm = BPF_CALL_IMM(desc->addr);
>         if (insn->off)
>                 return 0;
>         if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
> @@ -17351,17 +17408,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                    desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
>                 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
>                 *cnt = 1;
> -       } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
> -               bool seen_direct_write = env->seen_direct_write;
> -               bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
> -
> -               if (is_rdonly)
> -                       insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
> -
> -               /* restore env->seen_direct_write to its original value, since
> -                * may_access_direct_pkt_data mutates it
> -                */
> -               env->seen_direct_write = seen_direct_write;
>         }
>         return 0;
>  }
> @@ -17384,6 +17430,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>         struct bpf_map *map_ptr;
>         int i, ret, cnt, delta = 0;
>
> +       fixup_kfunc_desc_tab(env);
> +
>         for (i = 0; i < insn_cnt; i++, insn++) {
>                 /* Make divide-by-zero exceptions impossible. */
>                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
> @@ -17891,7 +17939,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>                 }
>         }
>
> -       sort_kfunc_descs_by_imm(env->prog);
> +       sort_kfunc_descs_by_imm_off(env->prog);
>
>         return 0;
>  }
> --
> 2.39.2
>




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux