Pu Lehui <pulehui@xxxxxxxxxxxxxxx> writes: > From: Pu Lehui <pulehui@xxxxxxxxxx> > > Implement bpf_arch_text_poke for RV64. For call scenario, > ftrace framework reserve 4 nops for RV64 kernel function > as function entry, and use auipc+jalr instructions to call > kernel or module functions. However, since the auipc+jalr > call instructions is non-atomic operation, we need to use > stop-machine to make sure instruction patching in atomic > context. As for jump scenario, since we only jump inside > the trampoline, a jal instruction is sufficient. Hmm, is that really true? More below! > > Signed-off-by: Pu Lehui <pulehui@xxxxxxxxxx> > --- > arch/riscv/net/bpf_jit.h | 5 ++ > arch/riscv/net/bpf_jit_comp64.c | 131 +++++++++++++++++++++++++++++++- > 2 files changed, 134 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h > index d926e0f7ef57..bf9802a63061 100644 > --- a/arch/riscv/net/bpf_jit.h > +++ b/arch/riscv/net/bpf_jit.h > @@ -573,6 +573,11 @@ static inline u32 rv_fence(u8 pred, u8 succ) > return rv_i_insn(imm11_0, 0, 0, 0, 0xf); > } > > +static inline u32 rv_nop(void) > +{ > + return rv_i_insn(0, 0, 0, 0, 0x13); > +} > + > /* RVC instrutions. */ > > static inline u16 rvc_addi4spn(u8 rd, u32 imm10) > diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c > index bf4721a99a09..fa8b03c52463 100644 > --- a/arch/riscv/net/bpf_jit_comp64.c > +++ b/arch/riscv/net/bpf_jit_comp64.c > @@ -8,6 +8,8 @@ > #include <linux/bitfield.h> > #include <linux/bpf.h> > #include <linux/filter.h> > +#include <linux/memory.h> > +#include <linux/stop_machine.h> > #include "bpf_jit.h" > > #define RV_REG_TCC RV_REG_A6 > @@ -238,7 +240,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) > if (!is_tail_call) > emit_mv(RV_REG_A0, RV_REG_A5, ctx); > emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, > - is_tail_call ? 4 : 0, /* skip TCC init */ > + is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */ > ctx); > } > > @@ -615,6 +617,127 @@ static int add_exception_handler(const struct bpf_insn *insn, > return 0; > } > > +struct text_poke_args { > + void *addr; > + const void *insns; > + size_t len; > + atomic_t cpu_count; > +}; > + > +static int do_text_poke(void *data) > +{ > + int ret = 0; > + struct text_poke_args *patch = data; > + > + if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { > + ret = patch_text_nosync(patch->addr, patch->insns, patch->len); > + atomic_inc(&patch->cpu_count); > + } else { > + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) > + cpu_relax(); > + smp_mb(); > + } > + > + return ret; > +} > + > +static int bpf_text_poke_stop_machine(void *addr, const void *insns, size_t len) > +{ > + struct text_poke_args patch = { > + .addr = addr, > + .insns = insns, > + .len = len, > + .cpu_count = ATOMIC_INIT(0), > + }; > + > + return stop_machine(do_text_poke, &patch, cpu_online_mask); > +} > + > +static int gen_call_or_nops(void *target, void *ip, u32 *insns) > +{ > + int i, ret; > + s64 rvoff; > + struct rv_jit_context ctx; > + > + ctx.ninsns = 0; > + ctx.insns = (u16 *)insns; > + > + if (!target) { > + for (i = 0; i < 4; i++) > + emit(rv_nop(), &ctx); > + return 0; > + } > + > + rvoff = (s64)(target - ip); > + emit(rv_sd(RV_REG_SP, -8, RV_REG_RA), &ctx); > + ret = emit_jump_and_link(RV_REG_RA, rvoff, false, &ctx); > + if (ret) > + return ret; > + emit(rv_ld(RV_REG_RA, -8, RV_REG_SP), &ctx); > + > + return 0; > + > +} > + > +static int bpf_text_poke_call(void *ip, void *old_addr, void *new_addr) > +{ > + int ret; > + u32 old_insns[4], new_insns[4]; > + > + ret = gen_call_or_nops(old_addr, ip + 4, old_insns); > + if (ret) > + return ret; > + > + ret = gen_call_or_nops(new_addr, ip + 4, new_insns); > + if (ret) > + return ret; > + > + mutex_lock(&text_mutex); > + if (memcmp(ip, old_insns, sizeof(old_insns))) { > + ret = -EFAULT; > + goto out; > + } > + > + if (memcmp(ip, new_insns, sizeof(new_insns))) > + ret = bpf_text_poke_stop_machine(ip, new_insns, > sizeof(new_insns)); I'd rather see that you added a patch_text variant to arch/riscv/kernel/patch.c (something like your bpf_text_poke_stop_machine()), and use that here. Might be other users of that as well -- Andy's ftrace patch maybe? :-) > +out: > + mutex_unlock(&text_mutex); > + return ret; > +} > + > +static int bpf_text_poke_jump(void *ip, void *old_addr, void *new_addr) > +{ > + int ret; > + u32 old_insn, new_insn; > + > + old_insn = old_addr ? rv_jal(RV_REG_ZERO, (s64)(old_addr - ip) >> 1) : rv_nop(); > + new_insn = new_addr ? rv_jal(RV_REG_ZERO, (s64)(new_addr - ip) >> 1) : rv_nop(); > + > + mutex_lock(&text_mutex); > + if (memcmp(ip, &old_insn, sizeof(old_insn))) { > + ret = -EFAULT; > + goto out; > + } > + > + if (memcmp(ip, &new_insn, sizeof(new_insn))) > + ret = patch_text_nosync(ip, &new_insn, sizeof(new_insn)); > +out: > + mutex_unlock(&text_mutex); > + return ret; > +} > + > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > + void *old_addr, void *new_addr) AFAIU there's nothing in the bpf_arch_text_poke() API that say that BPF_MOD_JUMP is jumps within the trampoline. That is one usage, but not the only one. In general, the jal might not have enough reach. I believe that this needs to be an auipc/jalr pair similar to BPF_MOD_CALL (w/o linked register). And again, thanks for working on the RV trampoline! Björn