On Mon, Sep 27, 2021 at 08:29:38PM +0530, Kumar Kartikeya Dwivedi wrote: > +static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) > +{ > + struct kfunc_desc *kdesc; > + int btf_fd_idx; > + > + kdesc = get_kfunc_desc(gen, relo->name); > + if (!kdesc) > + return; > + > + btf_fd_idx = kdesc->ref > 1 ? kdesc->off : add_kfunc_btf_fd(gen); > + if (btf_fd_idx > INT16_MAX) { > + pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", > + btf_fd_idx, relo->name); > + gen->error = -E2BIG; > + return; > + } > + /* load slot pointer */ > + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, > + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); > + /* Try to map one insn->off to one insn->imm */ > + if (kdesc->ref > 1) { > + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); > + goto skip_btf_fd; > + } else { > + /* cannot use index == 0 */ > + if (!btf_fd_idx) { > + btf_fd_idx = add_kfunc_btf_fd(gen); > + /* shift to next slot */ > + emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, sizeof(int))); > + } > + kdesc->off = btf_fd_idx; > + } > + > + /* set a default value */ > + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, 0, 0)); > + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); > + /* store BTF fd if ret < 0 */ > + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); > + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); > + /* store BTF fd in slot */ > + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, 0)); > + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_9)); > +skip_btf_fd: > + /* remember BTF fd to skip insn->off store for vmlinux case */ > + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); > + /* set a default value */ > + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_0, offsetof(struct bpf_insn, off), 0)); > + /* skip insn->off store if ret < 0 */ > + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); > + /* skip if vmlinux BTF */ > + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); > + /* store index into insn[insn_idx].off */ > + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_0, offsetof(struct bpf_insn, off), btf_fd_idx)); > + /* close fd that we skipped storing in fd_array */ > + if (kdesc->ref > 1) { > + emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_9)); > + __emit_sys_close(gen); > + } kdesc->ref optimization is neat, but I wonder why it's done half way. The generated code still calls bpf_btf_find_by_name_kind() and the kernel allocates the new FD. Then the loader prog simply ignores that FD and closes it. May be don't call the helper at all and just copy imm+off pair from already populated insn? I was thinking to do this optimization for emit_relo() for other cases, but didn't have time to do it. Since the code is doing this optimization I think it's worth doing it cleanly. Or just don't do it at all. It's great that there is a test for it in the patch 12 :)