On Tue, Sep 05, 2023 at 09:06:15PM +0000, Puranjay Mohan wrote: > The cpuv4 added the support of an instruction that is similar to load > but also sign-extends the result after the load. > > BPF_MEMSX | <size> | BPF_LDX means dst = *(signed size *) (src + offset) > here <size> can be one of BPF_B, BPF_H, BPF_W. > > ARM32 has instructions to load a byte or a half word with sign > extension into a 32bit register. As the JIT uses two 32 bit registers > to simulate a 64-bit BPF register, an extra instruction is emitted to > sign-extent the result up to the second register. > > Signed-off-by: Puranjay Mohan <puranjay12@xxxxxxxxx> > --- > arch/arm/net/bpf_jit_32.c | 69 ++++++++++++++++++++++++++++++++++++++- > arch/arm/net/bpf_jit_32.h | 2 ++ > 2 files changed, 70 insertions(+), 1 deletion(-) > > diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c > index b26579da770e..f7c162479cf2 100644 > --- a/arch/arm/net/bpf_jit_32.c > +++ b/arch/arm/net/bpf_jit_32.c > @@ -333,6 +333,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) > #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) > #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) > > +#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off) > +#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off) > + > #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) > #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) > #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) > @@ -1026,6 +1029,24 @@ static bool is_ldst_imm(s16 off, const u8 size) > return -off_max <= off && off <= off_max; > } > > +static bool is_ldst_imm8(s16 off, const u8 size) > +{ > + s16 off_max = 0; > + > + switch (size) { > + case BPF_B: > + off_max = 0xff; > + break; > + case BPF_W: > + off_max = 0xfff; > + break; > + case BPF_H: > + off_max = 0xff; > + break; > + } > + return -off_max <= off && off <= off_max; > +} > + > /* *(size *)(dst + off) = src */ > static inline void emit_str_r(const s8 dst, const s8 src[], > s16 off, struct jit_ctx *ctx, const u8 sz){ > @@ -1105,6 +1126,45 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, > arm_bpf_put_reg64(dst, rd, ctx); > } > > +/* dst = *(signed size*)(src + off) */ > +static inline void emit_ldsx_r(const s8 dst[], const s8 src, > + s16 off, struct jit_ctx *ctx, const u8 sz){ > + const s8 *tmp = bpf2a32[TMP_REG_1]; > + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; > + s8 rm = src; > + > + if (!is_ldst_imm8(off, sz)) { > + emit_a32_mov_i(tmp[0], off, ctx); > + emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); Hmm. This looks inefficient when "off" is able to fit in an immediate. Please try: int add_off; if (!is_ldst_imm8(off, sz)) { add_off = imm8m(off); if (add_off > 0) { emit(ARM_ADD_I(tmp[0], src, add_off), ctx); rm = tmp[0]; } else { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; } off = 0; > + } else if (rd[1] == rm) { > + emit(ARM_MOV_R(tmp[0], rm), ctx); > + rm = tmp[0]; Why do you need this? rd and rm can be the same for LDRS[BH]. > + } > + switch (sz) { > + case BPF_B: > + /* Load a Byte with sign extension*/ > + emit(ARM_LDRSB_I(rd[1], rm, off), ctx); > + /* Carry the sign extension to upper 32 bits */ > + emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); > + break; > + case BPF_H: > + /* Load a HalfWord with sign extension*/ > + emit(ARM_LDRSH_I(rd[1], rm, off), ctx); > + /* Carry the sign extension to upper 32 bits */ > + emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); > + break; > + case BPF_W: > + /* Load a Word*/ > + emit(ARM_LDR_I(rd[1], rm, off), ctx); > + /* Carry the sign extension to upper 32 bits */ > + emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); The last instruction extending to the upper 32 bits is the same in each of these cases, so is there any reason not to do it outside the switch statement? -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!