Re: [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2024/1/5 12:15, Alexei Starovoitov wrote:
> On Thu, Jan 4, 2024 at 6:23 AM Leon Hwang <hffilwlqm@xxxxxxxxx> wrote:
>>
>>
> 
> Other alternatives?

I've finish the POC of an alternative, which passed all tailcall
selftests including these tailcall hierarchy ones.

In this alternative, I use a new bpf_prog_run_ctx to wrap the original
ctx and the tcc_ptr, then get the tcc_ptr and recover the original ctx
in JIT.

Then, to avoid breaking runtime with tailcall on other arch, I add an
arch-related check bpf_jit_supports_tail_call_cnt_ptr() to determin
whether to use bpf_prog_run_ctx.

Here's the diff:

 diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4065bdcc5b2a4..56cea2676863e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -259,7 +259,7 @@ struct jit_context {
 /* Number of bytes emit_patch() needs to generate instructions */
 #define X86_PATCH_SIZE		5
 /* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET	(22 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET	(16 + ENDBR_INSN_SIZE)

 static void push_r12(u8 **pprog)
 {
@@ -407,21 +407,19 @@ static void emit_prologue(u8 **pprog, u32
stack_depth, bool ebpf_from_cbpf,
 	emit_nops(&prog, X86_PATCH_SIZE);
 	if (!ebpf_from_cbpf) {
 		if (tail_call_reachable && !is_subprog) {
-			/* When it's the entry of the whole tailcall context,
-			 * zeroing rax means initialising tail_call_cnt.
-			 */
-			EMIT2(0x31, 0xC0);       /* xor eax, eax */
-			EMIT1(0x50);             /* push rax */
-			/* Make rax as ptr that points to tail_call_cnt. */
-			EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */
-			EMIT1_off32(0xE8, 2);    /* call main prog */
-			EMIT1(0x59);             /* pop rcx, get rid of tail_call_cnt */
-			EMIT1(0xC3);             /* ret */
+			/* Make rax as tcc_ptr. */
+			EMIT4(0x48, 0x8B, 0x47, 0x08); /* mov rax, qword ptr [rdi + 8] */
 		} else {
-			/* Keep the same instruction size. */
-			emit_nops(&prog, 13);
+			/* Keep the same instruction layout. */
+			emit_nops(&prog, 4);
 		}
 	}
+	if (!is_subprog)
+		/* Recover the original ctx. */
+		EMIT3(0x48, 0x8B, 0x3F); /* mov rdi, qword ptr [rdi] */
+	else
+		/* Keep the same instruction layout. */
+		emit_nops(&prog, 3);
 	/* Exception callback receives FP as third parameter */
 	if (is_exception_cb) {
 		EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
@@ -3152,6 +3150,12 @@ bool bpf_jit_supports_subprog_tailcalls(void)
 	return true;
 }

+/* Indicate the JIT backend supports tail call count pointer in
tailcall context. */
+bool bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+	return true;
+}
+
 void bpf_jit_free(struct bpf_prog *prog)
 {
 	if (prog->jited) {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7671530d6e4e0..fea4326c27d31 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1919,6 +1919,11 @@ int bpf_prog_array_copy(struct bpf_prog_array
*old_array,
 			u64 bpf_cookie,
 			struct bpf_prog_array **new_array);

+struct bpf_prog_run_ctx {
+	const void *ctx;
+	u32 *tail_call_cnt;
+};
+
 struct bpf_run_ctx {};

 struct bpf_cg_run_ctx {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 68fb6c8142fec..c1c035c44b4ab 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -629,6 +629,10 @@ typedef unsigned int (*bpf_dispatcher_fn)(const
void *ctx,
 					  unsigned int (*bpf_func)(const void *,
 								   const struct bpf_insn *));

+static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
*prog,
+						const void *ctx,
+						bpf_dispatcher_fn dfunc);
+
 static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 					  const void *ctx,
 					  bpf_dispatcher_fn dfunc)
@@ -641,14 +645,14 @@ static __always_inline u32 __bpf_prog_run(const
struct bpf_prog *prog,
 		u64 start = sched_clock();
 		unsigned long flags;

-		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+		ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
 		stats = this_cpu_ptr(prog->stats);
 		flags = u64_stats_update_begin_irqsave(&stats->syncp);
 		u64_stats_inc(&stats->cnt);
 		u64_stats_add(&stats->nsecs, sched_clock() - start);
 		u64_stats_update_end_irqrestore(&stats->syncp, flags);
 	} else {
-		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+		ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
 	}
 	return ret;
 }
@@ -952,12 +956,31 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog);
 void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
 bool bpf_jit_supports_subprog_tailcalls(void);
+bool bpf_jit_supports_tail_call_cnt_ptr(void);
 bool bpf_jit_supports_kfunc_call(void);
 bool bpf_jit_supports_far_kfunc_call(void);
 bool bpf_jit_supports_exceptions(void);
 void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64
sp, u64 bp), void *cookie);
 bool bpf_helper_changes_pkt_data(void *func);

+static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
*prog,
+						const void *ctx,
+						bpf_dispatcher_fn dfunc)
+{
+	struct bpf_prog_run_ctx run_ctx = {};
+	u32 ret, tcc = 0;
+
+	run_ctx.ctx = ctx;
+	run_ctx.tail_call_cnt = &tcc;
+
+	if (bpf_jit_supports_tail_call_cnt_ptr() && prog->jited)
+		ret = dfunc(&run_ctx, prog->insnsi, prog->bpf_func);
+	else
+		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+
+	return ret;
+}
+
 static inline bool bpf_dump_raw_ok(const struct cred *cred)
 {
 	/* Reconstruction of call-sites is dependent on kallsyms,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ea6843be2616c..80b20e99456f0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2915,6 +2915,15 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void)
 	return false;
 }

+/* Return TRUE if the JIT backend supports tail call count pointer in
tailcall
+ * context.
+ */
+bool __weak bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+	return false;
+}
+EXPORT_SYMBOL(bpf_jit_supports_tail_call_cnt_ptr);
+
 bool __weak bpf_jit_supports_kfunc_call(void)
 {
 	return false;

Why use EXPORT_SYMBOL here?

It's to avoid the building error.

ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/sched/act_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/sched/cls_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/netfilter/xt_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr" [net/ipv6/ipv6.ko]
undefined!

I'm not familiar with this building error. Is it OK to use EXPORT_SYMBOL
here?

Thanks,
Leon





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux