Re: [RFC bpf-next v2 09/11] selftests/bpf: Extend xdp_metadata with devtx kfuncs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 21/06/2023 19.02, Stanislav Fomichev wrote:
Attach kfuncs that request and report TX timestamp via ringbuf.
Confirm on the userspace side that the program has triggered
and the timestamp is non-zero.

Also make sure devtx_frame has a sensible pointers and data.

[...]


diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index d151d406a123..fc025183d45a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
[...]
@@ -19,10 +24,25 @@ struct {
  	__type(value, __u32);
  } prog_arr SEC(".maps");
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 10);
+} tx_compl_buf SEC(".maps");
+
+__u64 pkts_fail_tx = 0;
+
+int ifindex = -1;
+__u64 net_cookie = -1;
+
  extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
  					 __u64 *timestamp) __ksym;
  extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
  				    enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_devtx_sb_request_timestamp(const struct devtx_frame *ctx) __ksym;
+extern int bpf_devtx_cp_timestamp(const struct devtx_frame *ctx, __u64 *timestamp) __ksym;
+
+extern int bpf_devtx_sb_attach(int ifindex, int prog_fd) __ksym;
+extern int bpf_devtx_cp_attach(int ifindex, int prog_fd) __ksym;
SEC("xdp")
  int rx(struct xdp_md *ctx)
@@ -61,4 +81,102 @@ int rx(struct xdp_md *ctx)
  	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
  }
+static inline int verify_frame(const struct devtx_frame *frame)
+{
+	struct ethhdr eth = {};
+
+	/* all the pointers are set up correctly */
+	if (!frame->data)
+		return -1;
+	if (!frame->sinfo)
+		return -1;
+
+	/* can get to the frags */
+	if (frame->sinfo->nr_frags != 0)
+		return -1;
+	if (frame->sinfo->frags[0].bv_page != 0)
+		return -1;
+	if (frame->sinfo->frags[0].bv_len != 0)
+		return -1;
+	if (frame->sinfo->frags[0].bv_offset != 0)
+		return -1;
+
+	/* the data has something that looks like ethernet */
+	if (frame->len != 46)
+		return -1;
+	bpf_probe_read_kernel(&eth, sizeof(eth), frame->data);
+
+	if (eth.h_proto != bpf_htons(ETH_P_IP))
+		return -1;
+
+	return 0;
+}
+
+SEC("fentry/veth_devtx_submit")
+int BPF_PROG(tx_submit, const struct devtx_frame *frame)
+{
+	struct xdp_tx_meta meta = {};
+	int ret;
+
+	if (frame->netdev->ifindex != ifindex)
+		return 0;
+	if (frame->netdev->nd_net.net->net_cookie != net_cookie)
+		return 0;
+	if (frame->meta_len != TX_META_LEN)
+		return 0;
+
+	bpf_probe_read_kernel(&meta, sizeof(meta), frame->data - TX_META_LEN);
+	if (!meta.request_timestamp)
+		return 0;
+
+	ret = verify_frame(frame);
+	if (ret < 0) {
+		__sync_add_and_fetch(&pkts_fail_tx, 1);
+		return 0;
+	}
+
+	ret = bpf_devtx_sb_request_timestamp(frame);

My original design thoughts were that BPF-progs would write into
metadata area, with the intend that at TX-complete we can access this
metadata area again.

In this case with request_timestamp it would make sense to me, to store
a sequence number (+ the TX-queue number), such that program code can
correlate on complete event.

Like xdp_hw_metadata example, I would likely also to add a software timestamp, what I could check at TX complete hook.

+	if (ret < 0) {
+		__sync_add_and_fetch(&pkts_fail_tx, 1);
+		return 0;
+	}
+
+	return 0;
+}
+
+SEC("fentry/veth_devtx_complete")
+int BPF_PROG(tx_complete, const struct devtx_frame *frame)
+{
+	struct xdp_tx_meta meta = {};
+	struct devtx_sample *sample;
+	int ret;
+
+	if (frame->netdev->ifindex != ifindex)
+		return 0;
+	if (frame->netdev->nd_net.net->net_cookie != net_cookie)
+		return 0;
+	if (frame->meta_len != TX_META_LEN)
+		return 0;
+
+	bpf_probe_read_kernel(&meta, sizeof(meta), frame->data - TX_META_LEN);
+	if (!meta.request_timestamp)
+		return 0;
+
+	ret = verify_frame(frame);
+	if (ret < 0) {
+		__sync_add_and_fetch(&pkts_fail_tx, 1);
+		return 0;
+	}
+
+	sample = bpf_ringbuf_reserve(&tx_compl_buf, sizeof(*sample), 0);
+	if (!sample)
+		return 0;

Sending this via a ringbuffer to userspace, will make it hard to
correlate. (For AF_XDP it would help a little to add the TX-queue
number, as this hook isn't queue bound but AF_XDP is).

+
+	sample->timestamp_retval = bpf_devtx_cp_timestamp(frame, &sample->timestamp);
+

I were expecting to see, information being written into the metadata area of the frame, such that AF_XDP completion-queue handling can extract this obtained timestamp.


+	bpf_ringbuf_submit(sample, 0);
+
+	return 0;
+}
+
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index 938a729bd307..e410f2b95e64 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -18,3 +18,17 @@ struct xdp_meta {
  		__s32 rx_hash_err;
  	};
  };
+
+struct devtx_sample {
+	int timestamp_retval;
+	__u64 timestamp;
+};
+
+#define TX_META_LEN	8

Very static design.

+
+struct xdp_tx_meta {
+	__u8 request_timestamp;
+	__u8 padding0;
+	__u16 padding1;
+	__u32 padding2;
+};

padding2 could be a btf_id for creating a more flexible design.

--Jesper




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux