[PATCH v2 8/8] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to
avoid having to save them/restore on the stack.

Before this patch, the test #359 ADD default X is:

   0:	7c 64 1b 78 	mr      r4,r3
   4:	38 60 00 00 	li      r3,0
   8:	94 21 ff b0 	stwu    r1,-80(r1)
   c:	60 00 00 00 	nop
  10:	92 e1 00 2c 	stw     r23,44(r1)
  14:	93 01 00 30 	stw     r24,48(r1)
  18:	93 21 00 34 	stw     r25,52(r1)
  1c:	93 41 00 38 	stw     r26,56(r1)
  20:	39 80 00 00 	li      r12,0
  24:	39 60 00 00 	li      r11,0
  28:	3b 40 00 00 	li      r26,0
  2c:	3b 20 00 00 	li      r25,0
  30:	7c 98 23 78 	mr      r24,r4
  34:	7c 77 1b 78 	mr      r23,r3
  38:	39 80 00 42 	li      r12,66
  3c:	39 60 00 00 	li      r11,0
  40:	7d 8c d2 14 	add     r12,r12,r26
  44:	39 60 00 00 	li      r11,0
  48:	7d 83 63 78 	mr      r3,r12
  4c:	82 e1 00 2c 	lwz     r23,44(r1)
  50:	83 01 00 30 	lwz     r24,48(r1)
  54:	83 21 00 34 	lwz     r25,52(r1)
  58:	83 41 00 38 	lwz     r26,56(r1)
  5c:	38 21 00 50 	addi    r1,r1,80
  60:	4e 80 00 20 	blr

After this patch, the same test has become:

   0:	7c 64 1b 78 	mr      r4,r3
   4:	38 60 00 00 	li      r3,0
   8:	94 21 ff b0 	stwu    r1,-80(r1)
   c:	60 00 00 00 	nop
  10:	39 80 00 00 	li      r12,0
  14:	39 60 00 00 	li      r11,0
  18:	39 00 00 00 	li      r8,0
  1c:	38 e0 00 00 	li      r7,0
  20:	7c 86 23 78 	mr      r6,r4
  24:	7c 65 1b 78 	mr      r5,r3
  28:	39 80 00 42 	li      r12,66
  2c:	39 60 00 00 	li      r11,0
  30:	7d 8c 42 14 	add     r12,r12,r8
  34:	39 60 00 00 	li      r11,0
  38:	7d 83 63 78 	mr      r3,r12
  3c:	38 21 00 50 	addi    r1,r1,80
  40:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
---
 arch/powerpc/net/bpf_jit.h        | 16 ++++++++++++++++
 arch/powerpc/net/bpf_jit64.h      |  2 +-
 arch/powerpc/net/bpf_jit_comp.c   |  2 ++
 arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++--
 arch/powerpc/net/bpf_jit_comp64.c |  4 ++++
 5 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index a45b8266355d..776abef4d2a0 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset)
 #define SEEN_STACK	0x40000000 /* uses BPF stack */
 #define SEEN_TAILCALL	0x80000000 /* uses tail calls */
 
+#define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
+
+#ifdef CONFIG_PPC64
+extern const int b2p[MAX_BPF_JIT_REG + 2];
+#else
+extern const int b2p[MAX_BPF_JIT_REG + 1];
+#endif
+
 struct codegen_context {
 	/*
 	 * This is used to track register usage as well
@@ -129,6 +138,7 @@ struct codegen_context {
 	unsigned int seen;
 	unsigned int idx;
 	unsigned int stack_size;
+	int b2p[ARRAY_SIZE(b2p)];
 };
 
 static inline void bpf_flush_icache(void *start, void *end)
@@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
 	ctx->seen |= 1 << (31 - i);
 }
 
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+	ctx->seen &= ~(1 << (31 - i));
+}
+
 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
 		       u32 *addrs, bool extra_pass);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index b05f2e67bba1..7b713edfa7e2 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -39,7 +39,7 @@
 #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
 
 /* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 2] = {
 	/* function return value */
 	[BPF_REG_0] = 8,
 	/* function arguments */
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index efac89964873..798ac4350a82 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	}
 
 	memset(&cgctx, 0, sizeof(struct codegen_context));
+	memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
 
 	/* Make sure that the stack is quadword aligned. */
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
@@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		}
 	}
 
+	bpf_jit_realloc_regs(&cgctx);
 	/*
 	 * Pretend to build prologue, given the features we've seen.  This will
 	 * update ctgtx.idx as it pretends to output instructions, then we can
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 29ce802d7534..003843273b43 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -37,7 +37,7 @@
 #define TMP_REG	(MAX_BPF_JIT_REG + 0)
 
 /* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 1] = {
 	/* function return value */
 	[BPF_REG_0] = 12,
 	/* function arguments */
@@ -60,7 +60,7 @@ static const int b2p[] = {
 
 static int bpf_to_ppc(struct codegen_context *ctx, int reg)
 {
-	return b2p[reg];
+	return ctx->b2p[reg];
 }
 
 /* PPC NVR range -- update this if we ever use NVRs below r17 */
@@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 	return BPF_PPC_STACKFRAME(ctx) - 4;
 }
 
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+	if (ctx->seen & SEEN_FUNC)
+		return;
+
+	while (ctx->seen & SEEN_NVREG_MASK &&
+	      (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+		int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab));
+		int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+		int i;
+
+		for (i = BPF_REG_0; i <= TMP_REG; i++) {
+			if (ctx->b2p[i] != old)
+				continue;
+			ctx->b2p[i] = new;
+			bpf_set_seen_register(ctx, new);
+			bpf_clear_seen_register(ctx, old);
+			if (i != TMP_REG) {
+				bpf_set_seen_register(ctx, new - 1);
+				bpf_clear_seen_register(ctx, old - 1);
+			}
+			break;
+		}
+	}
+}
+
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8a1f9fb00e78..57a8c1153851 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -64,6 +64,10 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 	BUG();
 }
 
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
-- 
2.25.0




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux