On Thu, Sep 30, 2021 at 11:59:47AM +0530, Kumar Kartikeya Dwivedi wrote: > This change updates the BPF syscall loader to relocate BTF_KIND_FUNC > relocations, with support for weak kfunc relocations. The general idea > is to move map_fds to loader map, and also use the data for storing > kfunc BTF fds. Since both reuse the fd_array parameter, they need to be > kept together. > > For map_fds, we reserve MAX_USED_MAPS slots in a region, and for kfunc, > we reserve MAX_KFUNC_DESCS. This is done so that insn->off has more > chances of being <= INT16_MAX than treating data map as a sparse array > and adding fd as needed. > > When the MAX_KFUNC_DESCS limit is reached, we fall back to the sparse > array model, so that as long as it does remain <= INT16_MAX, we pass an > index relative to the start of fd_array. > > We store all ksyms in an array where we try to avoid calling the > bpf_btf_find_by_name_kind helper, and also reuse the BTF fd that was > already stored. This also speeds up the loading process compared to > emitting calls in all cases, in later tests. > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> > --- > tools/lib/bpf/bpf_gen_internal.h | 16 +- > tools/lib/bpf/gen_loader.c | 323 ++++++++++++++++++++++++++----- > tools/lib/bpf/libbpf.c | 8 +- > 3 files changed, 292 insertions(+), 55 deletions(-) > > diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h > index 615400391e57..70eccbffefb1 100644 > --- a/tools/lib/bpf/bpf_gen_internal.h > +++ b/tools/lib/bpf/bpf_gen_internal.h > @@ -7,6 +7,15 @@ struct ksym_relo_desc { > const char *name; > int kind; > int insn_idx; > + bool is_weak; > +}; > + > +struct ksym_desc { > + const char *name; > + int ref; > + int kind; > + int off; > + int insn; > }; > > struct bpf_gen { > @@ -24,6 +33,10 @@ struct bpf_gen { > int relo_cnt; > char attach_target[128]; > int attach_kind; > + struct ksym_desc *ksyms; > + __u32 nr_ksyms; > + int fd_array; > + int nr_fd_array; > }; > > void bpf_gen__init(struct bpf_gen *gen, int log_level); > @@ -36,6 +49,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_a > void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); > void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); > void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); > -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); > +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, int kind, > + int insn_idx); > > #endif > diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c > index 80087b13877f..9902f2f47fb2 100644 > --- a/tools/lib/bpf/gen_loader.c > +++ b/tools/lib/bpf/gen_loader.c > @@ -14,8 +14,10 @@ > #include "bpf_gen_internal.h" > #include "skel_internal.h" > > -#define MAX_USED_MAPS 64 > -#define MAX_USED_PROGS 32 > +#define MAX_USED_MAPS 64 > +#define MAX_USED_PROGS 32 > +#define MAX_KFUNC_DESCS 256 > +#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) > > /* The following structure describes the stack layout of the loader program. > * In addition R6 contains the pointer to context. > @@ -30,7 +32,6 @@ > */ > struct loader_stack { > __u32 btf_fd; > - __u32 map_fd[MAX_USED_MAPS]; > __u32 prog_fd[MAX_USED_PROGS]; > __u32 inner_map_fd; > }; > @@ -143,13 +144,58 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size) > if (realloc_data_buf(gen, size8)) > return 0; > prev = gen->data_cur; > - memcpy(gen->data_cur, data, size); > + if (data) > + memcpy(gen->data_cur, data, size); Manpage says that realloc doesn't init the memory. Lets zero it here instead of leaving uninitialized. > gen->data_cur += size; > memcpy(gen->data_cur, &zero, size8 - size); > gen->data_cur += size8 - size; > return prev - gen->data_start; > } > > +/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative > + * to start of fd_array. Caller can decide if it is usable or not. > + */ > +static int fd_array_init(struct bpf_gen *gen) > +{ > + if (!gen->fd_array) > + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); > + return gen->fd_array; > +} > + > +static int add_map_fd(struct bpf_gen *gen) > +{ > + if (!fd_array_init(gen)) > + return -1; Pls drop the error check here and other places. In out-of-mem situation gen->error will be set and all subsequent ops will be using zero. The final check is in finish(). > + if (gen->nr_maps == MAX_USED_MAPS) { > + pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); > + gen->error = -E2BIG; > + return -1; > + } > + return gen->nr_maps++; > +} > + > +static int add_kfunc_btf_fd(struct bpf_gen *gen) > +{ > + int cur; > + > + if (!fd_array_init(gen)) > + return -1; drop it. > + if (gen->nr_fd_array == MAX_KFUNC_DESCS) { > + cur = add_data(gen, NULL, sizeof(int)); > + if (!cur) > + return -1; drop it. > + return (cur - gen->fd_array) / sizeof(int); > + } > + return MAX_USED_MAPS + gen->nr_fd_array++; > +} > + > +static int blob_fd_array_off(struct bpf_gen *gen, int index) > +{ > + if (!gen->fd_array) > + return 0; drop it. > + return gen->fd_array + (index * sizeof(int)); no need for extra () > +} > + > static int insn_bytes_to_bpf_size(__u32 sz) > { > switch (sz) { > @@ -171,14 +217,22 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data) > emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); > } > > -/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ > -static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) > +static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off) > { > - emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); > - emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); > + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, > + 0, 0, 0, blob_off)); > + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0)); > emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, > 0, 0, 0, off)); > - emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); > + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); > +} > + > +static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off) > +{ > + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, > + 0, 0, 0, blob_off)); > + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0)); > + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); > } > > static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, > @@ -326,11 +380,11 @@ int bpf_gen__finish(struct bpf_gen *gen) > offsetof(struct bpf_prog_desc, prog_fd), 4, > stack_off(prog_fd[i])); > for (i = 0; i < gen->nr_maps; i++) > - move_stack2ctx(gen, > - sizeof(struct bpf_loader_ctx) + > - sizeof(struct bpf_map_desc) * i + > - offsetof(struct bpf_map_desc, map_fd), 4, > - stack_off(map_fd[i])); > + move_blob2ctx(gen, > + sizeof(struct bpf_loader_ctx) + > + sizeof(struct bpf_map_desc) * i + > + offsetof(struct bpf_map_desc, map_fd), 4, > + blob_fd_array_off(gen, i)); > emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); > emit(gen, BPF_EXIT_INSN()); > pr_debug("gen: finish %d\n", gen->error); > @@ -390,7 +444,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, > { > int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); > bool close_inner_map_fd = false; > - int map_create_attr; > + int map_create_attr, idx; > union bpf_attr attr; > > memset(&attr, 0, attr_size); > @@ -467,9 +521,13 @@ void bpf_gen__map_create(struct bpf_gen *gen, > gen->error = -EDOM; /* internal bug */ > return; > } else { > - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, > - stack_off(map_fd[map_idx]))); > - gen->nr_maps++; > + /* add_map_fd does gen->nr_maps++ */ > + idx = add_map_fd(gen); > + if (idx < 0) > + return; drop it. > + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, > + 0, 0, 0, blob_fd_array_off(gen, idx))); > + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0)); > } > if (close_inner_map_fd) > emit_sys_close_stack(gen, stack_off(inner_map_fd)); > @@ -511,8 +569,8 @@ static void emit_find_attach_target(struct bpf_gen *gen) > */ > } > > -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, > - int insn_idx) > +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, > + int kind, int insn_idx) > { > struct ksym_relo_desc *relo; > > @@ -524,38 +582,196 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, > gen->relos = relo; > relo += gen->relo_cnt; > relo->name = name; > + relo->is_weak = is_weak; > relo->kind = kind; > relo->insn_idx = insn_idx; > gen->relo_cnt++; > } > > -static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) > +/* returns existing ksym_desc with ref incremented, or inserts a new one */ > +static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) > { > - int name, insn, len = strlen(relo->name) + 1; > + struct ksym_desc *kdesc; > > - pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); > - name = add_data(gen, relo->name, len); > + for (int i = 0; i < gen->nr_ksyms; i++) { > + if (!strcmp(gen->ksyms[i].name, relo->name)) { > + gen->ksyms[i].ref++; > + return &gen->ksyms[i]; > + } > + } > + kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); > + if (!kdesc) { > + gen->error = -ENOMEM; > + return NULL; > + } > + gen->ksyms = kdesc; > + kdesc = &gen->ksyms[gen->nr_ksyms++]; > + kdesc->name = relo->name; > + kdesc->kind = relo->kind; > + kdesc->ref = 1; > + kdesc->off = 0; > + kdesc->insn = 0; > + return kdesc; > +} > + > +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} > + * Returns result in BPF_REG_7 > + */ > +static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo) > +{ > + int name_off, len = strlen(relo->name) + 1; > > + name_off = add_data(gen, relo->name, len); > + if (!name_off) > + return; drop it. > emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, > - 0, 0, 0, name)); > + 0, 0, 0, name_off)); > emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); > emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); > emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); > emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); > emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); > debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); > - emit_check_err(gen); > +} > + > +/* Expects: > + * BPF_REG_8 - pointer to instruction > + * > + * We need to reuse BTF fd for same symbol otherwise each relocation takes a new > + * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, > + * this would mean a new BTF fd index for each entry. By pairing symbol name > + * with index, we get the insn->imm, insn->off pairing that kernel uses for > + * kfunc_tab, which becomes the effective limit even though all of them may > + * share same index in fd_array (such that kfunc_btf_tab has 1 element). > + */ > +static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) > +{ > + struct ksym_desc *kdesc; > + int btf_fd_idx; > + > + kdesc = get_ksym_desc(gen, relo); > + if (!kdesc) > + return; > + /* try to copy from existing bpf_insn */ > + if (kdesc->ref > 1) { > + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, > + kdesc->insn + offsetof(struct bpf_insn, imm)); > + move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2, > + kdesc->insn + offsetof(struct bpf_insn, off)); > + goto log; > + } > + /* remember insn offset, so we can copy BTF ID and FD later */ > + kdesc->insn = insn; > + emit_bpf_find_by_name_kind(gen, relo); > + if (!relo->is_weak) > + emit_check_err(gen); > + /* get index in fd_array to store BTF FD at */ > + btf_fd_idx = add_kfunc_btf_fd(gen); > + if (btf_fd_idx < 0) > + return; drop it. > + if (btf_fd_idx > INT16_MAX) { > + pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", > + btf_fd_idx, relo->name); > + gen->error = -E2BIG; > + return; this one is necessary. keep it.