From: Tonghao Zhang <xiangxia.m.yue@xxxxxxxxx> This patch introduce a new bpf_ktime_get_real_ns helper, which may help us to measure the skb latency in the ingress/forwarding path: HW/SW[1] -> ip_rcv/tcp_rcv_established -> tcp_recvmsg_locked/tcp_update_recv_tstamps * Insert BPF kprobe into ip_rcv/tcp_rcv_established invoking this helper. Then we can inspect how long time elapsed since HW/SW. * If inserting BPF kprobe tcp_update_recv_tstamps invoked by tcp_recvmsg, we can measure how much latency skb in tcp receive queue. The reason for this can be application fetch the TCP messages too late. [1]: - HW drivers may set skb_hwtstamps(skb)->hwtstamp - SW __netif_receive_skb_core set skb->tstamp with ktime_get_real() Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> Cc: Andrii Nakryiko <andrii@xxxxxxxxxx> Cc: Martin KaFai Lau <kafai@xxxxxx> Cc: Song Liu <songliubraving@xxxxxx> Cc: Yonghong Song <yhs@xxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: KP Singh <kpsingh@xxxxxxxxxx> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> Cc: Dave Marchevsky <davemarchevsky@xxxxxx> Cc: Kuniyuki Iwashima <kuniyu@xxxxxxxxxxxx> Cc: Joanne Koong <joannekoong@xxxxxx> Cc: Geliang Tang <geliang.tang@xxxxxxxx> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx> Cc: Jakub Kicinski <kuba@xxxxxxxxxx> Cc: Eric Dumazet <edumazet@xxxxxxxxxx> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@xxxxxxxxx> --- include/uapi/linux/bpf.h | 13 +++++++++++++ kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 14 ++++++++++++++ tools/include/uapi/linux/bpf.h | 13 +++++++++++++ 4 files changed, 41 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d14b10b85e51..2565c587fe1b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5143,6 +5143,18 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * u64 bpf_ktime_get_real_ns(void) + * Description + * Return a fine-grained version of the real (i.e., wall-clock) time, + * in nanoseconds. This clock is affected by discontinuous jumps in + * the system time (e.g., if the system administrator manually changes + * the clock), and by the incremental adjustments performed by adjtime(3) + * and NTP. + * See: **clock_gettime**\ (**CLOCK_REALTIME**) + * Return + * Current *ktime*. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5351,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(ktime_get_real_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 13e9dbeeedf3..acdf538b1dcd 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2627,6 +2627,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; +const struct bpf_func_proto bpf_ktime_get_real_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 315053ef6a75..d38548ed292f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -159,6 +159,18 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_ktime_get_real_ns) +{ + /* NMI safe access to clock realtime. */ + return ktime_get_real_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_real_ns_proto = { + .func = bpf_ktime_get_real_ns, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_ktime_get_boot_ns) { /* NMI safe access to clock boottime */ @@ -1410,6 +1422,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; + case BPF_FUNC_ktime_get_real_ns: + return &bpf_ktime_get_real_ns_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d14b10b85e51..2565c587fe1b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5143,6 +5143,18 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * u64 bpf_ktime_get_real_ns(void) + * Description + * Return a fine-grained version of the real (i.e., wall-clock) time, + * in nanoseconds. This clock is affected by discontinuous jumps in + * the system time (e.g., if the system administrator manually changes + * the clock), and by the incremental adjustments performed by adjtime(3) + * and NTP. + * See: **clock_gettime**\ (**CLOCK_REALTIME**) + * Return + * Current *ktime*. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5351,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(ktime_get_real_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- 2.27.0