On 4/29/24 8:20 AM, Xu Kuohai wrote:
From: Xu Kuohai <xukuohai@xxxxxxxxxx>
As shown in the ExpandSEXTINREG function in [1], LLVM generates SRL and
SRA instruction pair to implement sign-extension. For x86 and arm64,
this instruction pair will be folded to a single instruction, but the
LLVM BPF backend does not do such folding.
With -mcpu=v4, sign-extention will be generated and in selftest
test_progs-cpuv4 will test with -mcpu=v4. The cpu v4 support
is added in llvm18, and I hope once llvm18 is widely available, we
might be able to make test_progs-cpuv4 as the default test_progs.
So I think this optimization is not needed.
For example, the following C code:
long f(int x)
{
return x;
}
will be compiled to:
r0 = r1
r0 <<= 0x20
r0 s>>= 0x20
exit
Since 32-bit to 64-bit sign-extension is a common case and we already
have MOVSX instruction for sign-extension, this patch tries to fold
the 32-bit to 64-bit LSH and ARSH pair to a single MOVSX instruction.
[1] https://github.com/llvm/llvm-project/blob/4523a267829c807f3fc8fab8e5e9613985a51565/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp#L1228
Signed-off-by: Xu Kuohai <xukuohai@xxxxxxxxxx>
---
include/linux/filter.h | 8 ++++++++
kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7a27f19bf44d..7cc90a32ed9a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -173,6 +173,14 @@ struct ctl_table_header;
.off = 0, \
.imm = 0 })
+#define BPF_MOV64_SEXT_REG(DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
#define BPF_MOV32_REG(DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_MOV | BPF_X, \
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4e474ef44e9c..6bcee052d90d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20659,6 +20659,49 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env)
return 0;
}
+static bool is_sext32(struct bpf_insn *insn1, struct bpf_insn *insn2)
+{
+ if (insn1->code != (BPF_ALU64 | BPF_K | BPF_LSH) || insn1->imm != 32)
+ return false;
+
+ if (insn2->code != (BPF_ALU64 | BPF_K | BPF_ARSH) || insn2->imm != 32)
+ return false;
+
+ if (insn1->dst_reg != insn2->dst_reg)
+ return false;
+
+ return true;
+}
+
+/* LLVM generates sign-extension with LSH and ARSH pair, replace it with MOVSX.
+ *
+ * Before:
+ * DST <<= 32
+ * DST s>>= 32
+ *
+ * After:
+ * DST = (s32)DST
+ */
+static int optimize_sext32_insns(struct bpf_verifier_env *env)
+{
+ int i, err;
+ int insn_cnt = env->prog->len;
+ struct bpf_insn *insn = env->prog->insnsi;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (i + 1 >= insn_cnt || !is_sext32(insn, insn + 1))
+ continue;
+ /* patch current insn to MOVSX */
+ *insn = BPF_MOV64_SEXT_REG(insn->dst_reg, insn->dst_reg, 32);
+ /* remove next insn */
+ err = verifier_remove_insns(env, i + 1, 1);
+ if (err)
+ return err;
+ insn_cnt--;
+ }
+ return 0;
+}
+
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
@@ -21577,6 +21620,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret == 0)
ret = optimize_bpf_loop(env);
+ if (ret == 0)
+ ret = optimize_sext32_insns(env);
+
if (is_priv) {
if (ret == 0)
opt_hard_wire_dead_code_branches(env);