[PATCH bpf-next] arm64, bpf: Use bpf_prog_pack for arm64 bpf trampoline

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We used bpf_prog_pack to aggregate bpf programs into huge page to
relieve the iTLB pressure on the system. This was merged for ARM64[1]
We can apply it to bpf trampoline as well. This would increase the
preformance of fentry and struct_ops programs.

[1] https://lore.kernel.org/bpf/20240228141824.119877-1-puranjay12@xxxxxxxxx/

Signed-off-by: Puranjay Mohan <puranjay12@xxxxxxxxx>
---
 arch/arm64/net/bpf_jit_comp.c | 55 +++++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 5afc7a525eca..c5b461dda438 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2076,7 +2076,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 		/* store return value */
 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
 		/* reserve a nop for bpf_tramp_image_put */
-		im->ip_after_call = ctx->image + ctx->idx;
+		im->ip_after_call = ctx->ro_image + ctx->idx;
 		emit(A64_NOP, ctx);
 	}
 
@@ -2091,7 +2091,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 				run_ctx_off, false);
 
 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
-		im->ip_epilogue = ctx->image + ctx->idx;
+		im->ip_epilogue = ctx->ro_image + ctx->idx;
 		emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
 		emit_call((const u64)__bpf_tramp_exit, ctx);
 	}
@@ -2124,9 +2124,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 		emit(A64_RET(A64_R(10)), ctx);
 	}
 
-	if (ctx->image)
-		bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
-
 	kfree(branches);
 
 	return ctx->idx;
@@ -2169,14 +2166,43 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
 }
 
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
-				void *image_end, const struct btf_func_model *m,
+void *arch_alloc_bpf_trampoline(unsigned int size)
+{
+	return bpf_prog_pack_alloc(size, jit_fill_hole);
+}
+
+void arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+	bpf_prog_pack_free(image, size);
+}
+
+void arch_protect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
+				void *ro_image_end, const struct btf_func_model *m,
 				u32 flags, struct bpf_tramp_links *tlinks,
 				void *func_addr)
 {
 	int ret, nregs;
+	void *image, *tmp;
+	u32 size = ro_image_end - ro_image;
+
+	/* image doesn't need to be in module memory range, so we can
+	 * use kvmalloc.
+	 */
+	image = kvmalloc(size, GFP_KERNEL);
+	if (!image)
+		return -ENOMEM;
+
 	struct jit_ctx ctx = {
 		.image = image,
+		.ro_image = ro_image,
 		.idx = 0,
 	};
 
@@ -2185,15 +2211,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 	if (nregs > 8)
 		return -ENOTSUPP;
 
-	jit_fill_hole(image, (unsigned int)(image_end - image));
+	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
 	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
 
-	if (ret > 0 && validate_code(&ctx) < 0)
+	if (ret > 0 && validate_code(&ctx) < 0) {
 		ret = -EINVAL;
+		goto out;
+	}
 
 	if (ret > 0)
 		ret *= AARCH64_INSN_SIZE;
 
+	tmp = bpf_arch_text_copy(ro_image, image, size);
+	if (IS_ERR(tmp)) {
+		ret = PTR_ERR(tmp);
+		goto out;
+	}
+
+	bpf_flush_icache(ro_image, ro_image + size);
+out:
+	kvfree(image);
 	return ret;
 }
 
-- 
2.40.1





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux