On Thu, Feb 15, 2024 at 5:16 AM Leon Hwang <hffilwlqm@xxxxxxxxx> wrote: > > > Here's the diff: Please always send a diff against bpf-next. No one remembers your prior patch from months ago. > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 4065bdcc5b2a4..fc1df6a7d87c9 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -241,6 +241,8 @@ int bpf_arch_text_invalidate(void *dst, size_t len) > } > > struct jit_context { > + int prologue_tail_call_offset; > + > int cleanup_addr; /* Epilogue code offset */ > > /* > @@ -250,6 +252,8 @@ struct jit_context { > */ > int tail_call_direct_label; > int tail_call_indirect_label; > + > + bool tail_call_reachable; > }; > > /* Maximum number of bytes emitted while JITing one eBPF insn */ > @@ -259,7 +263,7 @@ struct jit_context { > /* Number of bytes emit_patch() needs to generate instructions */ > #define X86_PATCH_SIZE 5 > /* Number of bytes that will be skipped on tailcall */ > -#define X86_TAIL_CALL_OFFSET (22 + ENDBR_INSN_SIZE) > +#define X86_TAIL_CALL_OFFSET (14 + ENDBR_INSN_SIZE) > > static void push_r12(u8 **pprog) > { > @@ -389,6 +393,19 @@ static void emit_cfi(u8 **pprog, u32 hash) > *pprog = prog; > } > > +DEFINE_PER_CPU(u32, bpf_tail_call_cnt); > + > +__attribute__((used)) > +static u32 *bpf_tail_call_cnt_prepare(void) > +{ > + u32 *tcc_ptr = this_cpu_ptr(&bpf_tail_call_cnt); > + > + /* Initialise tail_call_cnt. */ > + *tcc_ptr = 0; > + > + return tcc_ptr; > +} This might need to be in asm to make sure no callee saved registers are touched. In general that's better, but it feels we can do better and avoid passing rax around. Just access bpf_tail_call_cnt directly from emit_bpf_tail_call.