On Thu, Feb 13, 2025 at 9:08 AM Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote: > > On 2/11/25 10:18 PM, Jason Xing wrote: > > BPF program calculates a couple of latency deltas between each tx > > timestamping callbacks. It can be used in the real world to diagnose > > the kernel behaviour in the tx path. > > > > Check the safety issues by accessing a few bpf calls in > > bpf_test_access_bpf_calls() which are implemented in the patch 3 and 4. > > > > Check if the bpf timestamping can co-exist with socket timestamping. > > > > There remains a few realistic things[1][2] to highlight: > > 1. in general a packet may pass through multiple qdiscs. For instance > > with bonding or tunnel virtual devices in the egress path. > > 2. packets may be resent, in which case an ACK might precede a repeat > > SCHED and SND. > > 3. erroneous or malicious peers may also just never send an ACK. > > > > [1]: https://lore.kernel.org/all/67a389af981b0_14e0832949d@xxxxxxxxxxxxxxxxxxxxxx.notmuch/ > > [2]: https://lore.kernel.org/all/c329a0c1-239b-4ca1-91f2-cb30b8dd2f6a@xxxxxxxxx/ > > > > Signed-off-by: Jason Xing <kerneljasonxing@xxxxxxxxx> > > --- > > .../bpf/prog_tests/net_timestamping.c | 231 +++++++++++++++++ > > .../selftests/bpf/progs/net_timestamping.c | 244 ++++++++++++++++++ > > 2 files changed, 475 insertions(+) > > create mode 100644 tools/testing/selftests/bpf/prog_tests/net_timestamping.c > > create mode 100644 tools/testing/selftests/bpf/progs/net_timestamping.c > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c > > new file mode 100644 > > index 000000000000..dcdc40473a7d > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c > > @@ -0,0 +1,231 @@ > > +#include <linux/net_tstamp.h> > > +#include <sys/time.h> > > +#include <linux/errqueue.h> > > +#include "test_progs.h" > > +#include "network_helpers.h" > > +#include "net_timestamping.skel.h" > > + > > +#define CG_NAME "/net-timestamping-test" > > +#define NSEC_PER_SEC 1000000000LL > > + > > +static const char addr4_str[] = "127.0.0.1"; > > +static const char addr6_str[] = "::1"; > > +static struct net_timestamping *skel; > > +static int cfg_payload_len = 30; > > const ? Will add it. > > > +static struct timespec usr_ts; > > +static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */ > > +int SK_TS_SCHED; > > +int SK_TS_TXSW; > > +int SK_TS_ACK; > > + > > +static int64_t timespec_to_ns64(struct timespec *ts) > > +{ > > + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; > > +} > > + > > +static void validate_key(int tskey, int tstype) > > +{ > > + static int expected_tskey = -1; > > + > > + if (tstype == SCM_TSTAMP_SCHED) > > + expected_tskey = cfg_payload_len - 1; > > + > > + ASSERT_EQ(expected_tskey, tskey, "tskey mismatch"); > > + > > + expected_tskey = tskey; > > +} > > + > > +static void validate_timestamp(struct timespec *cur, struct timespec *prev) > > +{ > > + int64_t cur_ns, prev_ns; > > + > > + cur_ns = timespec_to_ns64(cur); > > + prev_ns = timespec_to_ns64(prev); > > + > > + ASSERT_TRUE((cur_ns - prev_ns) < delay_tolerance_nsec, "latency"); > > ASSERT_LT() Got it! > > > +} > > + > > +static void test_socket_timestamp(struct scm_timestamping *tss, int tstype, > > + int tskey) > > +{ > > + static struct timespec *prev_ts = &usr_ts; > > + > > + validate_key(tskey, tstype); > > + > > + switch (tstype) { > > + case SCM_TSTAMP_SCHED: > > + validate_timestamp(&tss->ts[0], prev_ts); > > + SK_TS_SCHED = 1; > > + SK_TS_TXSW = SK_TS_ACK = 0; > > + break; > > + case SCM_TSTAMP_SND: > > + validate_timestamp(&tss->ts[0], prev_ts); > > + SK_TS_TXSW = 1; > > + break; > > + case SCM_TSTAMP_ACK: > > + validate_timestamp(&tss->ts[0], prev_ts); > > + SK_TS_ACK = 1; > > + break; > > + } > > + > > + prev_ts = &tss->ts[0]; > > +} > > + > > +static void test_recv_errmsg_cmsg(struct msghdr *msg) > > +{ > > + struct sock_extended_err *serr = NULL; > > + struct scm_timestamping *tss = NULL; > > + struct cmsghdr *cm; > > + > > + for (cm = CMSG_FIRSTHDR(msg); > > + cm && cm->cmsg_len; > > + cm = CMSG_NXTHDR(msg, cm)) { > > + if (cm->cmsg_level == SOL_SOCKET && > > + cm->cmsg_type == SCM_TIMESTAMPING) { > > + tss = (void *) CMSG_DATA(cm); > > + } else if ((cm->cmsg_level == SOL_IP && > > + cm->cmsg_type == IP_RECVERR) || > > + (cm->cmsg_level == SOL_IPV6 && > > + cm->cmsg_type == IPV6_RECVERR) || > > + (cm->cmsg_level == SOL_PACKET && > > + cm->cmsg_type == PACKET_TX_TIMESTAMP)) { > > + serr = (void *) CMSG_DATA(cm); > > + ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING, > > + "cmsg type"); > > + } > > + > > + if (serr && tss) > > + test_socket_timestamp(tss, serr->ee_info, > > + serr->ee_data); > > + } > > +} > > + > > +static bool socket_recv_errmsg(int fd) > > +{ > > + static char ctrl[1024 /* overprovision*/]; > > + char data[cfg_payload_len]; > > + static struct msghdr msg; > > + struct iovec entry; > > + int n = 0; > > + > > + memset(&msg, 0, sizeof(msg)); > > + memset(&entry, 0, sizeof(entry)); > > + memset(ctrl, 0, sizeof(ctrl)); > > + > > + entry.iov_base = data; > > + entry.iov_len = cfg_payload_len; > > + msg.msg_iov = &entry; > > + msg.msg_iovlen = 1; > > + msg.msg_name = NULL; > > + msg.msg_namelen = 0; > > + msg.msg_control = ctrl; > > + msg.msg_controllen = sizeof(ctrl); > > + > > + n = recvmsg(fd, &msg, MSG_ERRQUEUE); > > + if (n == -1) > > + ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE"); > > + > > + if (n >= 0) > > + test_recv_errmsg_cmsg(&msg); > > + > > + return n == -1; > > + > > +} > > + > > +static void test_socket_timestamping(int fd) > > +{ > > + while (!socket_recv_errmsg(fd)); > > + > > + ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED"); > > + ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND"); > > + ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK"); > > +} > > + > > +static void test_tcp(int family) > > +{ > > + struct net_timestamping__bss *bss = skel->bss; > > + char buf[cfg_payload_len]; > > + int sfd = -1, cfd = -1; > > + unsigned int sock_opt; > > + int ret; > > + > > + memset(bss, 0, sizeof(*bss)); > > No need to reset some of the new global variables, e.g. SK_TS_SCHED? I thought I had handled it well, but the fact is .... Will fix it. > > > + > > + sfd = start_server(family, SOCK_STREAM, > > + family == AF_INET6 ? addr6_str : addr4_str, 0, 0); > > + if (!ASSERT_OK_FD(sfd, "start_server")) > > + goto out; > > + > > + cfd = connect_to_fd(sfd, 0); > > + if (!ASSERT_OK_FD(cfd, "connect_to_fd_server")) > > + goto out; > > + > > + sock_opt = SOF_TIMESTAMPING_SOFTWARE | > > + SOF_TIMESTAMPING_OPT_ID | > > + SOF_TIMESTAMPING_TX_SCHED | > > + SOF_TIMESTAMPING_TX_SOFTWARE | > > + SOF_TIMESTAMPING_TX_ACK; > > + ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING, > > + (char *) &sock_opt, sizeof(sock_opt)); > > It also needs the original test in v9 to check the bpf timestamping works > without the user space's SO_TIMESTAMPING, which is the major use case of this > series. > > It should be easy to do by conditionally enabling the SO_TIMESTAMPING here. Agreed. > > > + if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING")) > > + goto out; > > + > > + ret = clock_gettime(CLOCK_REALTIME, &usr_ts); > > + if (!ASSERT_OK(ret, "get user time")) > > + goto out; > > + > > + ret = write(cfd, buf, sizeof(buf)); > > + if (!ASSERT_EQ(ret, sizeof(buf), "send to server")) > > + goto out; > > + > > + /* Test if socket timestamping works correctly even with bpf > > + * extension enabled. > > + */ > > + test_socket_timestamping(cfd); > > + > > + ASSERT_EQ(bss->nr_active, 1, "nr_active"); > > + ASSERT_EQ(bss->nr_snd, 2, "nr_snd"); > > + ASSERT_EQ(bss->nr_sched, 1, "nr_sched"); > > + ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw"); > > + ASSERT_EQ(bss->nr_ack, 1, "nr_ack"); > > + > > +out: > > + if (sfd >= 0) > > + close(sfd); > > + if (cfd >= 0) > > + close(cfd); > > +} > > + > > +void test_net_timestamping(void) > > +{ > > + struct netns_obj *ns; > > + int cg_fd; > > + > > + cg_fd = test__join_cgroup(CG_NAME); > > + if (!ASSERT_OK_FD(cg_fd, "join cgroup")) > > + return; > > + > > + ns = netns_new("net_timestamping_ns", true); > > + if (!ASSERT_OK_PTR(ns, "create ns")) > > + goto done; > > + > > + skel = net_timestamping__open_and_load(); > > + if (!ASSERT_OK_PTR(skel, "open and load skel")) > > + goto done; > > + > > + if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel")) > > + goto done; > > + > > + skel->links.skops_sockopt = > > + bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd); > > + if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup")) > > + goto done; > > + > > + test_tcp(AF_INET6); > > + test_tcp(AF_INET); > > Considering the w and w/o SO_TIMESTAMPING combinations (i.e. x2), it is worth to > have proper subtests. It is easy also. Take a look at the test__start_subtest() > usage `under the prog_tests/. Will use it :) > > > + > > +done: > > + net_timestamping__destroy(skel); > > + netns_free(ns); > > + close(cg_fd); > > +} > > diff --git a/tools/testing/selftests/bpf/progs/net_timestamping.c b/tools/testing/selftests/bpf/progs/net_timestamping.c > > new file mode 100644 > > index 000000000000..d3e1da599626 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/progs/net_timestamping.c > > @@ -0,0 +1,244 @@ > > +#include "vmlinux.h" > > +#include "bpf_tracing_net.h" > > +#include <bpf/bpf_helpers.h> > > +#include <bpf/bpf_tracing.h> > > +#include "bpf_misc.h" > > +#include "bpf_kfuncs.h" > > +#include <errno.h> > > + > > +#define SK_BPF_CB_FLAGS 1009 > > +#define SK_BPF_CB_TX_TIMESTAMPING 1 > > Remove these two defines. The vmlinux.h has it. Will do it. > > [ ... ] > > > +SEC("fentry/tcp_sendmsg_locked") > > +int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, size_t size) > > +{ > > + u64 timestamp = bpf_ktime_get_ns(); > > + u32 flag = sk->sk_bpf_cb_flags; > > + struct sk_stg *stg; > > + > > + if (!flag) > > I just noticed this one. > > Lets replace the "flag" check with a better check (e.g. pid check used in other > tests). Then it won't affect sk of other tests running in parallel. > > It is pretty easy. Take a look at how bpf_get_current_pid_tgid() is used in > progs/local_storage.c. Thanks, I would use "if (pid != monitored_pid || !flag)" to test. I've already made the changes as you suggested and it works. Thanks. I will do more rounds of tests. > > > > + return 0; > > + > > + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, > > + BPF_SK_STORAGE_GET_F_CREATE); > > + if (!stg) > > + return 0; > > + > > + stg->sendmsg_ns = timestamp; > > + nr_snd += 1; > > + return 0; > > +} > > +