On Mon, Nov 20, 2023 at 03:46:44PM +0100, Peter Zijlstra wrote: > + > +#ifdef CONFIG_CFI_CLANG > +struct bpf_insn; > + > +extern unsigned int bpf_func_proto(const void *ctx, > + const struct bpf_insn *insn); To make it more obvious what is going on could you rename it to __bpf_prog_runX() and add a comment that its prototype should match exactly bpf interpreters created by DEFINE_BPF_PROG_RUN() macro, otherwise cfi will explode. > + > +__ADDRESSABLE(bpf_func_proto); > + > +asm ( > +" .pushsection .data..ro_after_init,\"aw\",@progbits \n" > +" .type cfi_bpf_hash,@object \n" > +" .globl cfi_bpf_hash \n" > +" .p2align 2, 0x0 \n" > +"cfi_bpf_hash: \n" > +" .long __kcfi_typeid_bpf_func_proto \n" Took me some time to grok this. Cannot you use __CFI_TYPE() macro here ? > +" .size cfi_bpf_hash, 4 \n" > +" .popsection \n" > +); > +#endif ... > +static int emit_fineibt(u8 **pprog) > +{ > + u8 *prog = *pprog; > + > + EMIT_ENDBR(); > + EMIT3_off32(0x41, 0x81, 0xea, cfi_bpf_hash); > + EMIT2(0x74, 0x07); > + EMIT2(0x0f, 0x0b); > + EMIT1(0x90); > + EMIT_ENDBR_POISON(); Please add comments what this asm does. No one can read hex. > + > + *pprog = prog; > + return 16; 16 means "the caller of this code will jump to endbr_poison", right? > +} > + > +static int emit_kcfi(u8 **pprog) > +{ > + u8 *prog = *pprog; > + int offset = 5; > + > + EMIT1_off32(0xb8, cfi_bpf_hash); and here too. > +#ifdef CONFIG_CALL_PADDING > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + EMIT1(0x90); > + offset += 11; > +#endif > + EMIT_ENDBR(); > + > + *pprog = prog; > + return offset; 5 or 16 would mean "jump to endbr" ? > +} > + > +static int emit_cfi(u8 **pprog) > +{ > + u8 *prog = *pprog; > + int offset = 0; > + > + switch (cfi_mode) { > + case CFI_FINEIBT: > + offset = emit_fineibt(&prog); > + break; > + > + case CFI_KCFI: > + offset = emit_kcfi(&prog); > + break; > + > + default: > + EMIT_ENDBR(); > + break; > + } > + > + *pprog = prog; > + return offset; > +} > + > /* > * Emit x86-64 prologue code for BPF program. > * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes > * while jumping to another program > */ > -static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, > - bool tail_call_reachable, bool is_subprog, > - bool is_exception_cb) > +static int emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, > + bool tail_call_reachable, bool is_subprog, > + bool is_exception_cb) > { > u8 *prog = *pprog; > + int offset; > > + offset = emit_cfi(&prog); I'm not sure doing cfi_bpf_hash check in JITed code is completely solving the problem. >From bpf_dispatcher_*_func() calling into JITed will work, but this emit_prologue() is doing the same job for all bpf progs. Some bpf progs call each other directly and indirectly. bpf_dispatcher_*_func() -> JITed_BPF_A -> JITed_BPF_B. A into B can be a direct call (which cfi doesn't care about) and indirect via emit_bpf_tail_call_indirect()->emit_indirect_jump(). Should we care about fineibt/kcfi there too? > /* BPF trampoline can be made to work without these nops, > * but let's waste 5 bytes for now and optimize later > */ > - EMIT_ENDBR(); > memcpy(prog, x86_nops[5], X86_PATCH_SIZE); > prog += X86_PATCH_SIZE; > if (!ebpf_from_cbpf) { > @@ -357,6 +426,8 @@ static void emit_prologue(u8 **pprog, u3 > if (tail_call_reachable) > EMIT1(0x50); /* push rax */ > *pprog = prog; > + > + return offset; > } > > static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode) > @@ -1083,8 +1154,8 @@ static int do_jit(struct bpf_prog *bpf_p > bool tail_call_seen = false; > bool seen_exit = false; > u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; > - int i, excnt = 0; > int ilen, proglen = 0; > + int i, excnt = 0; > u8 *prog = temp; > int err; > > @@ -1094,9 +1165,12 @@ static int do_jit(struct bpf_prog *bpf_p > /* tail call's presence in current prog implies it is reachable */ > tail_call_reachable |= tail_call_seen; > > - emit_prologue(&prog, bpf_prog->aux->stack_depth, > - bpf_prog_was_classic(bpf_prog), tail_call_reachable, > - bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); > + ctx->prog_offset = emit_prologue(&prog, bpf_prog->aux->stack_depth, > + bpf_prog_was_classic(bpf_prog), > + tail_call_reachable, > + bpf_is_subprog(bpf_prog), > + bpf_prog->aux->exception_cb); > + > /* Exception callback will clobber callee regs for its own use, and > * restore the original callee regs from main prog's stack frame. > */ > @@ -2935,9 +3009,9 @@ struct bpf_prog *bpf_int_jit_compile(str > jit_data->header = header; > jit_data->rw_header = rw_header; > } > - prog->bpf_func = (void *)image; > + prog->bpf_func = (void *)image + ctx.prog_offset; I don't understand this. prog->bpf_func is the main entry point. Everything jumps there. Are you trying to skip all of cfi code in the prologue and let xdp_dispatcher jump to endbr or endbr_poison (depending on fineibt vs kcfi) ? Then what is the point of earlier asm bits? Is it a some clang thing that knows to offset indirect jump by exactly that many hard coded bytes ? Something in the clang does ptr -= 16 in case of fineibt and just jumps there ? and ptr -= 5 for kcfi ? If so, please add a giant comment explaining that. No one should be reverse engineering such intricate details. > prog->jited = 1; > - prog->jited_len = proglen; > + prog->jited_len = proglen - ctx.prog_offset; // XXX? jited_len is used later to cover the whole generated code. See bpf_prog_ksym_set_addr(): prog->aux->ksym.start = (unsigned long) prog->bpf_func; prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len; we definitely want ksym [start, end] to cover every useful byte of JITed code in case IRQ happens on that byte. Without covering cfi prologue the stack dump will be wrong for that frame. I guess if xdp_dispatcher with fineibt=on jumps into prog->bpf_func - 16 and IRQ fires we don't care that much about accurate stack of last frame ? I guess it's acceptable, but a comment is necessary.