The new helper should be used to convert deltas of values received by bpf_get_cpu_time_counter() into nanoseconds. It is not designed to do full conversion of time counter values to CLOCK_MONOTONIC_RAW nanoseconds and cannot guarantee monotonicity of 2 independent values, but rather to convert the difference of 2 close enough values of CPU timestamp counter into nanoseconds. This function is JITted into just several instructions and adds as low overhead as possible and perfectly suits benchmark use-cases. When the kfunc is not JITted it returns the value provided as argument because the kfunc in previous patch will return values in nanoseconds and can be optimized by verifier. Reviewed-by: Eduard Zingerman <eddyz87@xxxxxxxxx> Acked-by: Andrii Nakryiko <andrii@xxxxxxxxxx> Signed-off-by: Vadim Fedorenko <vadfed@xxxxxxxx> --- arch/x86/net/bpf_jit_comp.c | 29 ++++++++++++++++++++++++++++- arch/x86/net/bpf_jit_comp32.c | 1 + include/linux/bpf.h | 1 + kernel/bpf/helpers.c | 6 ++++++ kernel/bpf/verifier.c | 9 ++++++++- 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 284696d69df4..8ff8d7436fc9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -9,6 +9,7 @@ #include <linux/filter.h> #include <linux/if_vlan.h> #include <linux/bpf.h> +#include <linux/clocksource.h> #include <linux/memory.h> #include <linux/sort.h> #include <asm/extable.h> @@ -2287,6 +2288,31 @@ st: if (is_imm8(insn->off)) break; } + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + imm32 == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns) && + bpf_jit_inlines_kfunc_call(imm32)) { + struct cyc2ns_data data; + u32 mult, shift; + + /* stable TSC runs with fixed frequency and + * transformation coefficients are also fixed + */ + cyc2ns_read_begin(&data); + mult = data.cyc2ns_mul; + shift = data.cyc2ns_shift; + cyc2ns_read_end(); + /* imul RAX, RDI, mult */ + maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true); + EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0), + mult); + + /* shr RAX, shift (which is less than 64) */ + maybe_emit_1mod(&prog, BPF_REG_0, true); + EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift); + + break; + } + func = (u8 *) __bpf_call_base + imm32; if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) { LOAD_TAIL_CALL_CNT_PTR(stack_depth); @@ -3902,7 +3928,8 @@ bool bpf_jit_supports_timed_may_goto(void) /* x86-64 JIT can inline kfunc */ bool bpf_jit_inlines_kfunc_call(s32 imm) { - if (imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) && + if ((imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) || + imm == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns)) && cpu_feature_enabled(X86_FEATURE_TSC) && using_native_sched_clock() && sched_clock_stable()) return true; diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 68511888eb27..83176a07fc08 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -12,6 +12,7 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/if_vlan.h> +#include <linux/clocksource.h> #include <asm/cacheflush.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6cf9138b2437..fc03a3805b36 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3391,6 +3391,7 @@ u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); /* Inlined kfuncs */ u64 bpf_get_cpu_time_counter(void); +u64 bpf_cpu_time_counter_to_ns(u64 counter); #if defined(CONFIG_NET) bool bpf_sock_common_is_valid_access(int off, int size, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 26f71e2438d2..a176bd5a33d0 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3205,6 +3205,11 @@ __bpf_kfunc u64 bpf_get_cpu_time_counter(void) return ktime_get_raw_fast_ns(); } +__bpf_kfunc u64 bpf_cpu_time_counter_to_ns(u64 counter) +{ + return counter; +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3306,6 +3311,7 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_local_irq_save) BTF_ID_FLAGS(func, bpf_local_irq_restore) BTF_ID_FLAGS(func, bpf_get_cpu_time_counter) +BTF_ID_FLAGS(func, bpf_cpu_time_counter_to_ns, KF_FASTCALL) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aea1040b4462..3a908cf24e45 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12007,6 +12007,7 @@ enum special_kfunc_type { KF_bpf_iter_num_destroy, KF_bpf_set_dentry_xattr, KF_bpf_remove_dentry_xattr, + KF_bpf_cpu_time_counter_to_ns, }; BTF_SET_START(special_kfunc_set) @@ -12040,6 +12041,7 @@ BTF_ID(func, bpf_iter_css_task_new) BTF_ID(func, bpf_set_dentry_xattr) BTF_ID(func, bpf_remove_dentry_xattr) #endif +BTF_ID(func, bpf_cpu_time_counter_to_ns) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -12096,6 +12098,7 @@ BTF_ID(func, bpf_remove_dentry_xattr) BTF_ID_UNUSED BTF_ID_UNUSED #endif +BTF_ID(func, bpf_cpu_time_counter_to_ns) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -21246,6 +21249,9 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (!bpf_jit_supports_far_kfunc_call()) insn->imm = BPF_CALL_IMM(desc->addr); + /* if JIT will inline kfunc verifier shouldn't change the code */ + if (bpf_jit_inlines_kfunc_call(insn->imm)) + return 0; if (insn->off) return 0; if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] || @@ -21310,7 +21316,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg, node_offset_reg, insn, insn_buf, cnt); } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || - desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { + desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast] || + desc->func_id == special_kfunc_list[KF_bpf_cpu_time_counter_to_ns]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) { -- 2.47.1