On Fri, 2024-11-08 at 16:41 -0800, Vadim Fedorenko wrote: > New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts > it into rdtsc ordered call. Other architectures will get JIT > implementation too if supported. The fallback is to > __arch_get_hw_counter(). > > Signed-off-by: Vadim Fedorenko <vadfed@xxxxxxxx> > --- Aside from a note below, I think this patch is in good shape. Acked-by: Eduard Zingerman <eddyz87@xxxxxxxxx> [...] > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 06b080b61aa5..4f78ed93ee7f 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -2126,6 +2126,26 @@ st: if (is_imm8(insn->off)) > case BPF_JMP | BPF_CALL: { > u8 *ip = image + addrs[i - 1]; > > + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && > + imm32 == BPF_CALL_IMM(bpf_get_cpu_cycles)) { > + /* Save RDX because RDTSC will use EDX:EAX to return u64 */ > + emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3); > + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) > + EMIT_LFENCE(); > + EMIT2(0x0F, 0x31); > + > + /* shl RDX, 32 */ > + maybe_emit_1mod(&prog, BPF_REG_3, true); > + EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32); > + /* or RAX, RDX */ > + maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true); > + EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3)); > + /* restore RDX from R11 */ > + emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG); Note: The default implementation of this kfunc uses __arch_get_hw_counter(), which is implemented as `(u64)rdtsc_ordered() & S64_MAX`. Here we don't do `& S64_MAX`. The masking in __arch_get_hw_counter() was added by this commit: 77750f78b0b3 ("x86/vdso: Fix gettimeofday masking"). Also, the default implementation does not issue `lfence`. Not sure if this makes any real-world difference. > + > + break; > + } > + > func = (u8 *) __bpf_call_base + imm32; > if (tail_call_reachable) { > LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); [...] > @@ -20488,6 +20510,12 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > node_offset_reg, insn, insn_buf, cnt); > } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || > desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { > + if (!verifier_inlines_kfunc_call(env, imm)) { > + verbose(env, "verifier internal error: kfunc id %d is not defined in checker\n", > + desc->func_id); > + return -EFAULT; > + } > + Nit: still think that moving this check as the first conditional would have been better: if (verifier_inlines_kfunc_call(env, imm)) { if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { // ... } else { // report error } } else if (...) { // ... rest of the cases } > insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); > *cnt = 1; > } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {