Re: [PATCH bpf-next v11 2/4] bpf: add bpf_cpu_time_counter_to_ns helper

Vadim Fedorenko <vadim.fedorenko@xxxxxxxxx> · Tue, 18 Mar 2025 08:44:55 +0000

On 18/03/2025 00:29, Alexei Starovoitov wrote:
On Mon, Mar 17, 2025 at 3:50 PM Vadim Fedorenko <vadfed@xxxxxxxx> wrote:

The new helper should be used to convert deltas of values
received by bpf_get_cpu_time_counter() into nanoseconds. It is not
designed to do full conversion of time counter values to
CLOCK_MONOTONIC_RAW nanoseconds and cannot guarantee monotonicity of 2
independent values, but rather to convert the difference of 2 close
enough values of CPU timestamp counter into nanoseconds.

This function is JITted into just several instructions and adds as
low overhead as possible and perfectly suits benchmark use-cases.

When the kfunc is not JITted it returns the value provided as argument
because the kfunc in previous patch will return values in nanoseconds.

Reviewed-by: Eduard Zingerman <eddyz87@xxxxxxxxx>
Acked-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
Signed-off-by: Vadim Fedorenko <vadfed@xxxxxxxx>
---
  arch/x86/net/bpf_jit_comp.c   | 28 +++++++++++++++++++++++++++-
  arch/x86/net/bpf_jit_comp32.c | 27 ++++++++++++++++++++++++++-
  include/linux/bpf.h           |  1 +
  kernel/bpf/helpers.c          |  6 ++++++
  4 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 92cd5945d630..3e4d45defe2f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,6 +9,7 @@
  #include <linux/filter.h>
  #include <linux/if_vlan.h>
  #include <linux/bpf.h>
+#include <linux/clocksource.h>
  #include <linux/memory.h>
  #include <linux/sort.h>
  #include <asm/extable.h>
@@ -2289,6 +2290,30 @@ st:                      if (is_imm8(insn->off))
                                 break;
                         }

+                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+                           IS_ENABLED(CONFIG_BPF_SYSCALL) &&
+                           imm32 == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns) &&
+                           cpu_feature_enabled(X86_FEATURE_TSC) &&
+                           using_native_sched_clock() && sched_clock_stable()) {

And now this condition copy pasted 3 times ?!

Yeah, I'll factor it out

+                               struct cyc2ns_data data;
+                               u32 mult, shift;
+
+                               cyc2ns_read_begin(&data);
+                               mult = data.cyc2ns_mul;
+                               shift = data.cyc2ns_shift;
+                               cyc2ns_read_end();

This needs a big comment explaining why this math will be stable
after JIT and for the lifetime of the prog.

It's more or less the same comment as for the JIT of
bpf_get_cpu_time_counter(). I'll add it.


+                               /* imul RAX, RDI, mult */
+                               maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true);
+                               EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0),
+                                           mult);
+
+                               /* shr RAX, shift (which is less than 64) */
+                               maybe_emit_1mod(&prog, BPF_REG_0, true);
+                               EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift);
+
+                               break;
+                       }
+
                         func = (u8 *) __bpf_call_base + imm32;
                         if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
                                 LOAD_TAIL_CALL_CNT_PTR(stack_depth);
@@ -3906,7 +3931,8 @@ bool bpf_jit_inlines_kfunc_call(s32 imm)
  {
         if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
                 return false;
-       if (imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) &&
+       if ((imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) ||
+           imm == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns)) &&
             cpu_feature_enabled(X86_FEATURE_TSC) &&
             using_native_sched_clock() && sched_clock_stable())
                 return true;
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 7f13509c66db..9791a3fb9d69 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -12,6 +12,7 @@
  #include <linux/netdevice.h>
  #include <linux/filter.h>
  #include <linux/if_vlan.h>
+#include <linux/clocksource.h>
  #include <asm/cacheflush.h>
  #include <asm/set_memory.h>
  #include <asm/nospec-branch.h>
@@ -2115,6 +2116,29 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                                         EMIT2(0x0F, 0x31);
                                         break;
                                 }
+                               if (IS_ENABLED(CONFIG_BPF_SYSCALL) &&
+                                   imm32 == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns) &&
+                                   cpu_feature_enabled(X86_FEATURE_TSC) &&
+                                   using_native_sched_clock() && sched_clock_stable()) {
+                                       struct cyc2ns_data data;
+                                       u32 mult, shift;
+
+                                       cyc2ns_read_begin(&data);
+                                       mult = data.cyc2ns_mul;
+                                       shift = data.cyc2ns_shift;
+                                       cyc2ns_read_end();

same here.

+
+                                       /* move parameter to BPF_REG_0 */
+                                       emit_ia32_mov_r64(true, bpf2ia32[BPF_REG_0],
+                                                         bpf2ia32[BPF_REG_1], true, true,
+                                                         &prog, bpf_prog->aux);
+                                       /* multiply parameter by mut */
+                                       emit_ia32_mul_i64(bpf2ia32[BPF_REG_0],
+                                                         mult, true, &prog);

How did you test this?
It's far from obvious that this will match what mul_u64_u32_shr() does.
And on a quick look I really doubt.

Well, I can re-write it op-by-op from mul_u64_u32_shr(), but it's more
or less the same given that mult and shift are not too big, which is
common for TSC coefficients.


The trouble of adding support for 32-bit JIT doesn't seem worth it.

Do you mean it's better to drop this JIT implementation?


+                                       /* shift parameter by shift which is less than 64 */
+                                       emit_ia32_rsh_i64(bpf2ia32[BPF_REG_0],
+                                                         shift, true, &prog);
+                               }

                                 err = emit_kfunc_call(bpf_prog,
                                                       image + addrs[i],
@@ -2648,7 +2672,8 @@ bool bpf_jit_inlines_kfunc_call(s32 imm)
  {
         if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
                 return false;
-       if (imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) &&
+       if ((imm == BPF_CALL_IMM(bpf_get_cpu_time_counter) ||
+           imm == BPF_CALL_IMM(bpf_cpu_time_counter_to_ns)) &&
             cpu_feature_enabled(X86_FEATURE_TSC) &&
             using_native_sched_clock() && sched_clock_stable())
                 return true;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a5e9b592d3e8..f45a704f06e3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3389,6 +3389,7 @@ u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

  /* Inlined kfuncs */
  u64 bpf_get_cpu_time_counter(void);
+u64 bpf_cpu_time_counter_to_ns(u64 counter);

  #if defined(CONFIG_NET)
  bool bpf_sock_common_is_valid_access(int off, int size,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 43bf35a15f78..e5ed5ba4b4aa 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3198,6 +3198,11 @@ __bpf_kfunc u64 bpf_get_cpu_time_counter(void)
         return ktime_get_raw_fast_ns();
  }

+__bpf_kfunc u64 bpf_cpu_time_counter_to_ns(u64 counter)
+{
+       return counter;
+}
+
  __bpf_kfunc_end_defs();

  BTF_KFUNCS_START(generic_btf_ids)
@@ -3299,6 +3304,7 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
  BTF_ID_FLAGS(func, bpf_local_irq_save)
  BTF_ID_FLAGS(func, bpf_local_irq_restore)
  BTF_ID_FLAGS(func, bpf_get_cpu_time_counter, KF_FASTCALL)
+BTF_ID_FLAGS(func, bpf_cpu_time_counter_to_ns, KF_FASTCALL)
  BTF_KFUNCS_END(common_btf_ids)

  static const struct btf_kfunc_id_set common_kfunc_set = {
--
2.47.1