Re: [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy

Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx> · Wed, 14 Feb 2024 12:25:04 +0100

On Wed, Feb 14, 2024 at 01:47:45PM +0800, Leon Hwang wrote:
> 
> 
> On 2024/1/5 12:15, Alexei Starovoitov wrote:
> > On Thu, Jan 4, 2024 at 6:23 AM Leon Hwang <hffilwlqm@xxxxxxxxx> wrote:
> >>
> >>
> > 
> > Other alternatives?
> 
> I've finish the POC of an alternative, which passed all tailcall
> selftests including these tailcall hierarchy ones.
> 
> In this alternative, I use a new bpf_prog_run_ctx to wrap the original
> ctx and the tcc_ptr, then get the tcc_ptr and recover the original ctx
> in JIT.
> 
> Then, to avoid breaking runtime with tailcall on other arch, I add an
> arch-related check bpf_jit_supports_tail_call_cnt_ptr() to determin
> whether to use bpf_prog_run_ctx.
> 
> Here's the diff:

This is diff against your previous proposed solution, would be good to see
how it currently looks being put together (this diff on top of your
patch), would save us some effort to dig the patch up and include diff.

> 
>  diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 4065bdcc5b2a4..56cea2676863e 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -259,7 +259,7 @@ struct jit_context {
>  /* Number of bytes emit_patch() needs to generate instructions */
>  #define X86_PATCH_SIZE		5
>  /* Number of bytes that will be skipped on tailcall */
> -#define X86_TAIL_CALL_OFFSET	(22 + ENDBR_INSN_SIZE)
> +#define X86_TAIL_CALL_OFFSET	(16 + ENDBR_INSN_SIZE)
> 
>  static void push_r12(u8 **pprog)
>  {
> @@ -407,21 +407,19 @@ static void emit_prologue(u8 **pprog, u32
> stack_depth, bool ebpf_from_cbpf,
>  	emit_nops(&prog, X86_PATCH_SIZE);
>  	if (!ebpf_from_cbpf) {
>  		if (tail_call_reachable && !is_subprog) {
> -			/* When it's the entry of the whole tailcall context,
> -			 * zeroing rax means initialising tail_call_cnt.
> -			 */
> -			EMIT2(0x31, 0xC0);       /* xor eax, eax */
> -			EMIT1(0x50);             /* push rax */
> -			/* Make rax as ptr that points to tail_call_cnt. */
> -			EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */
> -			EMIT1_off32(0xE8, 2);    /* call main prog */
> -			EMIT1(0x59);             /* pop rcx, get rid of tail_call_cnt */
> -			EMIT1(0xC3);             /* ret */
> +			/* Make rax as tcc_ptr. */
> +			EMIT4(0x48, 0x8B, 0x47, 0x08); /* mov rax, qword ptr [rdi + 8] */
>  		} else {
> -			/* Keep the same instruction size. */
> -			emit_nops(&prog, 13);
> +			/* Keep the same instruction layout. */
> +			emit_nops(&prog, 4);
>  		}
>  	}
> +	if (!is_subprog)
> +		/* Recover the original ctx. */
> +		EMIT3(0x48, 0x8B, 0x3F); /* mov rdi, qword ptr [rdi] */
> +	else
> +		/* Keep the same instruction layout. */
> +		emit_nops(&prog, 3);
>  	/* Exception callback receives FP as third parameter */
>  	if (is_exception_cb) {
>  		EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
> @@ -3152,6 +3150,12 @@ bool bpf_jit_supports_subprog_tailcalls(void)
>  	return true;
>  }
> 
> +/* Indicate the JIT backend supports tail call count pointer in
> tailcall context. */
> +bool bpf_jit_supports_tail_call_cnt_ptr(void)
> +{
> +	return true;
> +}
> +
>  void bpf_jit_free(struct bpf_prog *prog)
>  {
>  	if (prog->jited) {
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 7671530d6e4e0..fea4326c27d31 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1919,6 +1919,11 @@ int bpf_prog_array_copy(struct bpf_prog_array
> *old_array,
>  			u64 bpf_cookie,
>  			struct bpf_prog_array **new_array);
> 
> +struct bpf_prog_run_ctx {
> +	const void *ctx;
> +	u32 *tail_call_cnt;
> +};
> +
>  struct bpf_run_ctx {};
> 
>  struct bpf_cg_run_ctx {
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 68fb6c8142fec..c1c035c44b4ab 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -629,6 +629,10 @@ typedef unsigned int (*bpf_dispatcher_fn)(const
> void *ctx,
>  					  unsigned int (*bpf_func)(const void *,
>  								   const struct bpf_insn *));
> 
> +static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
> *prog,
> +						const void *ctx,
> +						bpf_dispatcher_fn dfunc);
> +
>  static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
>  					  const void *ctx,
>  					  bpf_dispatcher_fn dfunc)
> @@ -641,14 +645,14 @@ static __always_inline u32 __bpf_prog_run(const
> struct bpf_prog *prog,
>  		u64 start = sched_clock();
>  		unsigned long flags;
> 
> -		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
> +		ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
>  		stats = this_cpu_ptr(prog->stats);
>  		flags = u64_stats_update_begin_irqsave(&stats->syncp);
>  		u64_stats_inc(&stats->cnt);
>  		u64_stats_add(&stats->nsecs, sched_clock() - start);
>  		u64_stats_update_end_irqrestore(&stats->syncp, flags);
>  	} else {
> -		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
> +		ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
>  	}
>  	return ret;
>  }
> @@ -952,12 +956,31 @@ struct bpf_prog *bpf_int_jit_compile(struct
> bpf_prog *prog);
>  void bpf_jit_compile(struct bpf_prog *prog);
>  bool bpf_jit_needs_zext(void);
>  bool bpf_jit_supports_subprog_tailcalls(void);
> +bool bpf_jit_supports_tail_call_cnt_ptr(void);
>  bool bpf_jit_supports_kfunc_call(void);
>  bool bpf_jit_supports_far_kfunc_call(void);
>  bool bpf_jit_supports_exceptions(void);
>  void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64
> sp, u64 bp), void *cookie);
>  bool bpf_helper_changes_pkt_data(void *func);
> 
> +static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
> *prog,
> +						const void *ctx,
> +						bpf_dispatcher_fn dfunc)
> +{
> +	struct bpf_prog_run_ctx run_ctx = {};
> +	u32 ret, tcc = 0;
> +
> +	run_ctx.ctx = ctx;
> +	run_ctx.tail_call_cnt = &tcc;
> +
> +	if (bpf_jit_supports_tail_call_cnt_ptr() && prog->jited)
> +		ret = dfunc(&run_ctx, prog->insnsi, prog->bpf_func);
> +	else
> +		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
> +
> +	return ret;
> +}
> +
>  static inline bool bpf_dump_raw_ok(const struct cred *cred)
>  {
>  	/* Reconstruction of call-sites is dependent on kallsyms,
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index ea6843be2616c..80b20e99456f0 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2915,6 +2915,15 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void)
>  	return false;
>  }
> 
> +/* Return TRUE if the JIT backend supports tail call count pointer in
> tailcall
> + * context.
> + */
> +bool __weak bpf_jit_supports_tail_call_cnt_ptr(void)
> +{
> +	return false;
> +}
> +EXPORT_SYMBOL(bpf_jit_supports_tail_call_cnt_ptr);
> +
>  bool __weak bpf_jit_supports_kfunc_call(void)
>  {
>  	return false;
> 
> Why use EXPORT_SYMBOL here?
> 
> It's to avoid the building error.
> 
> ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
> [net/sched/act_bpf.ko] undefined!
> ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
> [net/sched/cls_bpf.ko] undefined!
> ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
> [net/netfilter/xt_bpf.ko] undefined!
> ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr" [net/ipv6/ipv6.ko]
> undefined!
> 
> I'm not familiar with this building error. Is it OK to use EXPORT_SYMBOL
> here?
> 
> Thanks,
> Leon
>