Pu Lehui <pulehui@xxxxxxxxxxxxxxx> writes: > From: Pu Lehui <pulehui@xxxxxxxxxx> > > Optimize bswap instructions by rev8 Zbb instruction conbined with srli > instruction. And Optimize 16-bit zero-extension with Zbb support. > > Signed-off-by: Pu Lehui <pulehui@xxxxxxxxxx> > --- > arch/riscv/net/bpf_jit.h | 67 +++++++++++++++++++++++++++++++++ > arch/riscv/net/bpf_jit_comp64.c | 50 +----------------------- > 2 files changed, 69 insertions(+), 48 deletions(-) > > diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h > index 944bdd6e4..a04eed672 100644 > --- a/arch/riscv/net/bpf_jit.h > +++ b/arch/riscv/net/bpf_jit.h > @@ -1135,12 +1135,79 @@ static inline void emit_sextw(u8 rd, u8 rs, struct rv_jit_context *ctx) > emit_addiw(rd, rs, 0, ctx); > } > > +static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx) > +{ > + if (rvzbb_enabled()) { > + emit(rvzbb_zexth(rd, rs), ctx); > + } else { > + emit_slli(rd, rs, 48, ctx); > + emit_srli(rd, rd, 48, ctx); > + } > +} > + Prefer early-exit. > static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx) > { > emit_slli(rd, rs, 32, ctx); > emit_srli(rd, rd, 32, ctx); > } > > +static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx) > +{ > + if (rvzbb_enabled()) { > + int bits = 64 - imm; > + > + emit(rvzbb_rev8(rd, rd), ctx); > + if (bits) > + emit_srli(rd, rd, bits, ctx); > + } else { > + emit_li(RV_REG_T2, 0, ctx); > + > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); > + emit_srli(rd, rd, 8, ctx); > + if (imm == 16) > + goto out_be; > + > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); > + emit_srli(rd, rd, 8, ctx); > + > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); > + emit_srli(rd, rd, 8, ctx); > + if (imm == 32) > + goto out_be; > + > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); > + emit_srli(rd, rd, 8, ctx); > + > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); > + emit_srli(rd, rd, 8, ctx); > + > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); > + emit_srli(rd, rd, 8, ctx); > + > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); > + emit_srli(rd, rd, 8, ctx); > +out_be: > + emit_andi(RV_REG_T1, rd, 0xff, ctx); > + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); > + > + emit_mv(rd, RV_REG_T2, ctx); > + } > +} Definitely early-exit for this one! This function really show-cases why ZBB is nice! ;-) I'll take the next rev of series for a test! Björn