On 21/06/2023 19.02, Stanislav Fomichev wrote:
Attach kfuncs that request and report TX timestamp via ringbuf.
Confirm on the userspace side that the program has triggered
and the timestamp is non-zero.
Also make sure devtx_frame has a sensible pointers and data.
[...]
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index d151d406a123..fc025183d45a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
[...]
@@ -19,10 +24,25 @@ struct {
__type(value, __u32);
} prog_arr SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 10);
+} tx_compl_buf SEC(".maps");
+
+__u64 pkts_fail_tx = 0;
+
+int ifindex = -1;
+__u64 net_cookie = -1;
+
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_devtx_sb_request_timestamp(const struct devtx_frame *ctx) __ksym;
+extern int bpf_devtx_cp_timestamp(const struct devtx_frame *ctx, __u64 *timestamp) __ksym;
+
+extern int bpf_devtx_sb_attach(int ifindex, int prog_fd) __ksym;
+extern int bpf_devtx_cp_attach(int ifindex, int prog_fd) __ksym;
SEC("xdp")
int rx(struct xdp_md *ctx)
@@ -61,4 +81,102 @@ int rx(struct xdp_md *ctx)
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
+static inline int verify_frame(const struct devtx_frame *frame)
+{
+ struct ethhdr eth = {};
+
+ /* all the pointers are set up correctly */
+ if (!frame->data)
+ return -1;
+ if (!frame->sinfo)
+ return -1;
+
+ /* can get to the frags */
+ if (frame->sinfo->nr_frags != 0)
+ return -1;
+ if (frame->sinfo->frags[0].bv_page != 0)
+ return -1;
+ if (frame->sinfo->frags[0].bv_len != 0)
+ return -1;
+ if (frame->sinfo->frags[0].bv_offset != 0)
+ return -1;
+
+ /* the data has something that looks like ethernet */
+ if (frame->len != 46)
+ return -1;
+ bpf_probe_read_kernel(ð, sizeof(eth), frame->data);
+
+ if (eth.h_proto != bpf_htons(ETH_P_IP))
+ return -1;
+
+ return 0;
+}
+
+SEC("fentry/veth_devtx_submit")
+int BPF_PROG(tx_submit, const struct devtx_frame *frame)
+{
+ struct xdp_tx_meta meta = {};
+ int ret;
+
+ if (frame->netdev->ifindex != ifindex)
+ return 0;
+ if (frame->netdev->nd_net.net->net_cookie != net_cookie)
+ return 0;
+ if (frame->meta_len != TX_META_LEN)
+ return 0;
+
+ bpf_probe_read_kernel(&meta, sizeof(meta), frame->data - TX_META_LEN);
+ if (!meta.request_timestamp)
+ return 0;
+
+ ret = verify_frame(frame);
+ if (ret < 0) {
+ __sync_add_and_fetch(&pkts_fail_tx, 1);
+ return 0;
+ }
+
+ ret = bpf_devtx_sb_request_timestamp(frame);
My original design thoughts were that BPF-progs would write into
metadata area, with the intend that at TX-complete we can access this
metadata area again.
In this case with request_timestamp it would make sense to me, to store
a sequence number (+ the TX-queue number), such that program code can
correlate on complete event.
Like xdp_hw_metadata example, I would likely also to add a software
timestamp, what I could check at TX complete hook.
+ if (ret < 0) {
+ __sync_add_and_fetch(&pkts_fail_tx, 1);
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("fentry/veth_devtx_complete")
+int BPF_PROG(tx_complete, const struct devtx_frame *frame)
+{
+ struct xdp_tx_meta meta = {};
+ struct devtx_sample *sample;
+ int ret;
+
+ if (frame->netdev->ifindex != ifindex)
+ return 0;
+ if (frame->netdev->nd_net.net->net_cookie != net_cookie)
+ return 0;
+ if (frame->meta_len != TX_META_LEN)
+ return 0;
+
+ bpf_probe_read_kernel(&meta, sizeof(meta), frame->data - TX_META_LEN);
+ if (!meta.request_timestamp)
+ return 0;
+
+ ret = verify_frame(frame);
+ if (ret < 0) {
+ __sync_add_and_fetch(&pkts_fail_tx, 1);
+ return 0;
+ }
+
+ sample = bpf_ringbuf_reserve(&tx_compl_buf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
Sending this via a ringbuffer to userspace, will make it hard to
correlate. (For AF_XDP it would help a little to add the TX-queue
number, as this hook isn't queue bound but AF_XDP is).
+
+ sample->timestamp_retval = bpf_devtx_cp_timestamp(frame, &sample->timestamp);
+
I were expecting to see, information being written into the metadata
area of the frame, such that AF_XDP completion-queue handling can
extract this obtained timestamp.
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index 938a729bd307..e410f2b95e64 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -18,3 +18,17 @@ struct xdp_meta {
__s32 rx_hash_err;
};
};
+
+struct devtx_sample {
+ int timestamp_retval;
+ __u64 timestamp;
+};
+
+#define TX_META_LEN 8
Very static design.
+
+struct xdp_tx_meta {
+ __u8 request_timestamp;
+ __u8 padding0;
+ __u16 padding1;
+ __u32 padding2;
+};
padding2 could be a btf_id for creating a more flexible design.
--Jesper