On Sat, Jan 25, 2025 at 11:08 AM Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote: > > On 1/20/25 5:29 PM, Jason Xing wrote: > > Only check if we pass those three key points after we enable the > > bpf extension for so_timestamping. During each point, we can choose > > whether to print the current timestamp. > > > > Signed-off-by: Jason Xing <kerneljasonxing@xxxxxxxxx> > > --- > > .../bpf/prog_tests/so_timestamping.c | 98 ++++++++ > > .../selftests/bpf/progs/so_timestamping.c | 227 ++++++++++++++++++ > > 2 files changed, 325 insertions(+) > > create mode 100644 tools/testing/selftests/bpf/prog_tests/so_timestamping.c > > create mode 100644 tools/testing/selftests/bpf/progs/so_timestamping.c > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/so_timestamping.c b/tools/testing/selftests/bpf/prog_tests/so_timestamping.c > > new file mode 100644 > > index 000000000000..bbfa7eb38cfb > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/prog_tests/so_timestamping.c > > @@ -0,0 +1,98 @@ > > +#define _GNU_SOURCE > > +#include <sched.h> > > +#include <linux/socket.h> > > +#include <linux/tls.h> > > +#include <net/if.h> > > + > > +#include "test_progs.h" > > +#include "cgroup_helpers.h" > > +#include "network_helpers.h" > > + > > +#include "so_timestamping.skel.h" > > + > > +#define CG_NAME "/so-timestamping-test" > > + > > +static const char addr4_str[] = "127.0.0.1"; > > +static const char addr6_str[] = "::1"; > > +static struct so_timestamping *skel; > > +static int cg_fd; > > + > > +static int create_netns(void) > > Reuse the netns_new("so_timestamping_ns", true) from test_progs.c. > > > +{ > > + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) > > + return -1; > > + > > + if (!ASSERT_OK(system("ip link set dev lo up"), "set lo up")) > > + return -1; > > + > > + return 0; > > +} > > + > > +static void test_tcp(int family) > > +{ > > + struct so_timestamping__bss *bss = skel->bss; > > + char buf[] = "testing testing"; > > + int sfd = -1, cfd = -1; > > + int n; > > + > > + memset(bss, 0, sizeof(*bss)); > > + > > + sfd = start_server(family, SOCK_STREAM, > > + family == AF_INET6 ? addr6_str : addr4_str, 0, 0); > > + if (!ASSERT_GE(sfd, 0, "start_server")) > > nit. ASSERT_OK_FD. > > > + goto out; > > + > > + cfd = connect_to_fd(sfd, 0); > > + if (!ASSERT_GE(cfd, 0, "connect_to_fd_server")) { > > Same here. ASSERT_OK_FD. > > > + close(sfd); > > This close is unnecessary. It will cause a double close at "out:" also. > > > + goto out; > > + } > > + > > + n = write(cfd, buf, sizeof(buf)); > > + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) > > + goto out; > > + > > + ASSERT_EQ(bss->nr_active, 1, "nr_active"); > > + ASSERT_EQ(bss->nr_snd, 2, "nr_snd"); > > + ASSERT_EQ(bss->nr_sched, 1, "nr_sched"); > > + ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw"); > > + ASSERT_EQ(bss->nr_ack, 1, "nr_ack"); > > + > > +out: > > + if (sfd >= 0) > > + close(sfd); > > + if (cfd >= 0) > > + close(cfd); > > +} > > + > > +void test_so_timestamping(void) > > +{ > > + cg_fd = test__join_cgroup(CG_NAME); > > + if (cg_fd < 0) > > + return; > > + > > + if (create_netns()) > > + goto done; > > + > > + skel = so_timestamping__open(); > > nit. so_timestamping__open_and_load() > > > + if (!ASSERT_OK_PTR(skel, "open skel")) > > + goto done; > > + > > + if (!ASSERT_OK(so_timestamping__load(skel), "load skel")) > > Then this __load() is not need. > > > + goto done; > > + > > + if (!ASSERT_OK(so_timestamping__attach(skel), "attach skel")) > > + goto done; > > + > > + skel->links.skops_sockopt = > > + bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd); > > + if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup")) > > + goto done; > > + > > + test_tcp(AF_INET6); > > + test_tcp(AF_INET); > > + > > +done: > > + so_timestamping__destroy(skel); > > + close(cg_fd); > > +} > > diff --git a/tools/testing/selftests/bpf/progs/so_timestamping.c b/tools/testing/selftests/bpf/progs/so_timestamping.c > > new file mode 100644 > > index 000000000000..f4708e84c243 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/progs/so_timestamping.c > > @@ -0,0 +1,227 @@ > > +#include "vmlinux.h" > > +#include "bpf_tracing_net.h" > > +#include <bpf/bpf_core_read.h> > > +#include <bpf/bpf_helpers.h> > > +#include <bpf/bpf_tracing.h> > > +//#include <bpf/bpf_core_read.h> > > +#include "bpf_misc.h" > > +#include "bpf_kfuncs.h" > > + > > +#define SK_BPF_CB_FLAGS 1009 > > +#define SK_BPF_CB_TX_TIMESTAMPING 1 > > + > > +int nr_active; > > +int nr_snd; > > +int nr_passive; > > +int nr_sched; > > +int nr_txsw; > > +int nr_ack; > > + > > +struct sockopt_test { > > + int opt; > > + int new; > > +}; > > + > > +static const struct sockopt_test sol_socket_tests[] = { > > + { .opt = SK_BPF_CB_FLAGS, .new = SK_BPF_CB_TX_TIMESTAMPING, }, > > + { .opt = 0, }, > > +}; > > + > > +struct loop_ctx { > > + void *ctx; > > + const struct sock *sk; > > +}; > > + > > +struct sk_stg { > > + __u64 sendmsg_ns; /* record ts when sendmsg is called */ > > +}; > > + > > +struct { > > + __uint(type, BPF_MAP_TYPE_SK_STORAGE); > > + __uint(map_flags, BPF_F_NO_PREALLOC); > > + __type(key, int); > > + __type(value, struct sk_stg); > > +} sk_stg_map SEC(".maps"); > > + > > + > > +struct delay_info { > > + u64 sendmsg_ns; /* record ts when sendmsg is called */ > > + u32 sched_delay; /* SCHED_OPT_CB - sendmsg_ns */ > > + u32 sw_snd_delay; /* SW_OPT_CB - SCHED_OPT_CB */ > > + u32 ack_delay; /* ACK_OPT_CB - SW_OPT_CB */ > > +}; > > + > > +struct { > > + __uint(type, BPF_MAP_TYPE_HASH); > > + __type(key, u32); > > + __type(value, struct delay_info); > > + __uint(max_entries, 1024); > > +} time_map SEC(".maps"); > > + > > +static u64 delay_tolerance_nsec = 1000000000; /* 1 second as an example */ > > + > > +static int bpf_test_sockopt_int(void *ctx, const struct sock *sk, > > + const struct sockopt_test *t, > > + int level) > > +{ > > + int new, opt, tmp; > > + > > + opt = t->opt; > > + new = t->new; > > + > > + if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new))) > > + return 1; > > + > > + if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) || > > + tmp != new) { > > + return 1; > > + } > > + > > + return 0; > > +} > > + > > +static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc) > > +{ > > + const struct sockopt_test *t; > > + > > + if (i >= ARRAY_SIZE(sol_socket_tests)) > > + return 1; > > + > > + t = &sol_socket_tests[i]; > > + if (!t->opt) > > + return 1; > > + > > + return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET); > > +} > > + > > +static int bpf_test_sockopt(void *ctx, const struct sock *sk) > > +{ > > + struct loop_ctx lc = { .ctx = ctx, .sk = sk, }; > > + int n; > > + > > + n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0); > > + if (n != ARRAY_SIZE(sol_socket_tests)) > > + return -1; > > + > > + return 0; > > +} > > + > > +static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk) > > +{ > > + struct bpf_sock_ops_kern *skops_kern; > > + u64 timestamp = bpf_ktime_get_ns(); > > + struct skb_shared_info *shinfo; > > + struct delay_info dinfo = {0}; > > + struct delay_info *val; > > + struct sk_buff *skb; > > + struct sk_stg *stg; > > + u32 delay, tskey; > > + u64 prior_ts; > > + > > + skops_kern = bpf_cast_to_kern_ctx(skops); > > + skb = skops_kern->skb; > > + shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info); > > + tskey = shinfo->tskey; > > + if (!tskey) > > + return false; > > + > > + if (skops->op == BPF_SOCK_OPS_TS_TCP_SND_CB) { > > + stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0); > > + if (!stg) > > + return false; > > + dinfo.sendmsg_ns = stg->sendmsg_ns; > > + val = &dinfo; > > Move the map_update here instead. > > bpf_map_update_elem(&time_map, &tskey, val, BPF_ANY); > > > + goto out; > > + } > > + > > + val = bpf_map_lookup_elem(&time_map, &tskey); > > + if (!val) > > + return false; > > + > > + switch (skops->op) { > > + case BPF_SOCK_OPS_TS_SCHED_OPT_CB: > > + delay = val->sched_delay = timestamp - val->sendmsg_ns; > > + break; > > + case BPF_SOCK_OPS_TS_SW_OPT_CB: > > + prior_ts = val->sched_delay + val->sendmsg_ns; > > + delay = val->sw_snd_delay = timestamp - prior_ts; > > + break; > > + case BPF_SOCK_OPS_TS_ACK_OPT_CB: > > + prior_ts = val->sw_snd_delay + val->sched_delay + val->sendmsg_ns; > > + delay = val->ack_delay = timestamp - prior_ts; > > + break; > > + } > > + > > + if (delay <= 0 || delay >= delay_tolerance_nsec) > > Regarding delay <= 0 check, note that delay was defined as u32. > > delay_tolerance_nsec is 1 sec which could be too short for the bpf CI. May be > raise it to like 10s and only check "if (delay >= delay_tolerance_nsec)". It > will be useful to bump a nr_long_delay++ also and ASSERT in the userspace. > > btw, it is in nsec, is u32 enough? > > > > + return false; > > + > > + /* Since it's the last one, remove from the map after latency check */ > > + if (skops->op == BPF_SOCK_OPS_TS_ACK_OPT_CB) { > > + bpf_map_delete_elem(&time_map, &tskey); > > + return true; > > + } > > + > > +out: > > + bpf_map_update_elem(&time_map, &tskey, val, BPF_ANY); > > then no need to do update_elem here for other op. I'm going to adjust all the points that you mentioned as above. Thanks! > > Overall, I think the set looks good. Only a few things left. Thanks for > revamping the test also. The test should be pretty close to how it will be used. > > Please add tests to ensure the new timestamping callbacks cannot use the helpers > that we discussed in the earlier patch and also cannot directly read/write the > sock fields through the bpf_sock_ops. No problem. Will do it in the next respin. > > Please also add some details on how the UDP BPF_SOCK_OPS_TS_TCP_SND_CB (or to be > renamed to BPF_SOCK_OPS_TS_SND_CB ?) will look like. It is the only callback > that I don't have a clear idea for UDP. I think I will rename it as you said. But I wonder if I can add more details about UDP after this series gets merged which should not be too late. After this series, I will carefully consider and test how we use for UDP type. > > Please tag the set to bpf-next. Then the bpf CI can pick up automatically and > continue testing it whenever some other bpf patches landed. Got it! > > [ I will reply other followup later ] Thanks for your work. > > > + return true; > > +} > > + > > +SEC("fentry/tcp_sendmsg_locked") > > +int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, size_t size) > > +{ > > + u64 timestamp = bpf_ktime_get_ns(); > > + u32 flag = sk->sk_bpf_cb_flags; > > + struct sk_stg *stg; > > + > > + if (!flag) > > + return 0; > > + > > + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, > > + BPF_SK_STORAGE_GET_F_CREATE); > > + if (!stg) > > + return 0; > > + > > + stg->sendmsg_ns = timestamp; > > + nr_snd += 1; > > + return 0; > > +} > > + > > +SEC("sockops") > > +int skops_sockopt(struct bpf_sock_ops *skops) > > +{ > > + struct bpf_sock *bpf_sk = skops->sk; > > + const struct sock *sk; > > + > > + if (!bpf_sk) > > + return 1; > > + > > + sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk); > > + if (!sk) > > + return 1; > > + > > + switch (skops->op) { > > + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: > > + nr_active += !bpf_test_sockopt(skops, sk); > > + break; > > + case BPF_SOCK_OPS_TS_TCP_SND_CB: > > + if (bpf_test_delay(skops, sk)) > > + nr_snd += 1; > > + break; > > + case BPF_SOCK_OPS_TS_SCHED_OPT_CB: > > + if (bpf_test_delay(skops, sk)) > > + nr_sched += 1; > > + break; > > + case BPF_SOCK_OPS_TS_SW_OPT_CB: > > + if (bpf_test_delay(skops, sk)) > > + nr_txsw += 1; > > + break; > > + case BPF_SOCK_OPS_TS_ACK_OPT_CB: > > + if (bpf_test_delay(skops, sk)) > > + nr_ack += 1; > > + break; > > + } > > + > > + return 1; > > +} > > + > > +char _license[] SEC("license") = "GPL"; >