ldimm64 is not only used for loading function addresses, and the NOPs added for padding are impacting performance, so avoid them when not necessary. On QEMU mac99, with the patch: test_bpf: #829 ALU64_MOV_K: all immediate value magnitudes jited:1 167436810 PASS test_bpf: #831 ALU64_OR_K: all immediate value magnitudes jited:1 170702940 PASS Without the patch: test_bpf: #829 ALU64_MOV_K: all immediate value magnitudes jited:1 173012360 PASS test_bpf: #831 ALU64_OR_K: all immediate value magnitudes jited:1 176424090 PASS That's a 3.5% performance improvement. Fixes: f9320c49993c ("powerpc/bpf: Update ldimm64 instructions during extra pass") Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx> --- arch/powerpc/net/bpf_jit_comp.c | 3 ++- arch/powerpc/net/bpf_jit_comp32.c | 5 +++-- arch/powerpc/net/bpf_jit_comp64.c | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 43e634126514..206b698723a3 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -68,7 +68,8 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, * of the JITed sequence remains unchanged. */ ctx->idx = tmp_idx; - } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { + } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW) && + insn[i].src_reg == BPF_PSEUDO_FUNC) { tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index a379b0ce19ff..878f8a88d83e 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -960,8 +960,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); /* padding to allow full 4 instructions for later patching */ - for (j = ctx->idx - tmp_idx; j < 4; j++) - EMIT(PPC_RAW_NOP()); + if (insn[i].src_reg == BPF_PSEUDO_FUNC) + for (j = ctx->idx - tmp_idx; j < 4; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 29ee306d6302..af8bdb5553cd 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -938,8 +938,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * tmp_idx = ctx->idx; PPC_LI64(dst_reg, imm64); /* padding to allow full 5 instructions for later patching */ - for (j = ctx->idx - tmp_idx; j < 5; j++) - EMIT(PPC_RAW_NOP()); + if (insn[i].src_reg == BPF_PSEUDO_FUNC) + for (j = ctx->idx - tmp_idx; j < 5; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; -- 2.38.1