[1/2 bpf-next] bpf: expose net_device from xdp for metadata

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Allow xdp progs to read the net_device structure. Its useful to extract
info from the dev itself. Currently, our tracing tooling uses kprobes
to capture statistics and information about running net devices. We use
kprobes instead of other hooks tc/xdp because we need to collect
information about the interface not exposed through the xdp_md structures.
This has some down sides that we want to avoid by moving these into the
XDP hook itself. First, placing the kprobes in a generic function in
the kernel is after XDP so we miss redirects and such done by the
XDP networking program. And its needless overhead because we are
already paying the cost for calling the XDP program, calling yet
another prog is a waste. Better to do everything in one hook from
performance side.

Of course we could one-off each one of these fields, but that would
explode the xdp_md struct and then require writing convert_ctx_access
writers for each field. By using BTF we avoid writing field specific
convertion logic, BTF just knows how to read the fields, we don't
have to add many fields to xdp_md, and I don't have to get every
field we will use in the future correct.

For reference current examples in our code base use the ifindex,
ifname, qdisc stats, net_ns fields, among others. With this
patch we can now do the following,

        dev = ctx->rx_dev;
        net = dev->nd_net.net;

	uid.ifindex = dev->ifindex;
	memcpy(uid.ifname, dev->ifname, NAME);
        if (net)
		uid.inum = net->ns.inum;

to report the name, index and ns.inum which identifies an
interface in our system.

Signed-off-by: John Fastabend <john.fastabend@xxxxxxxxx>
---
 include/uapi/linux/bpf.h       |  1 +
 net/core/filter.c              | 19 +++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 3 files changed, 21 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 94659f6b3395..50403eb3b6cf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6123,6 +6123,7 @@ struct xdp_md {
 	__u32 rx_queue_index;  /* rxq->queue_index  */
 
 	__u32 egress_ifindex;  /* txq->dev->ifindex */
+	__bpf_md_ptr(struct net_device *, rx_dev); /* rxq->dev */
 };
 
 /* DEVMAP map-value layout
diff --git a/net/core/filter.c b/net/core/filter.c
index bb0136e7a8e4..d445ffbea8f1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8686,6 +8686,8 @@ static bool __is_valid_xdp_access(int off, int size)
 	return true;
 }
 
+BTF_ID_LIST_SINGLE(btf_xdp_get_netdev_id, struct, net_device)
+
 static bool xdp_is_valid_access(int off, int size,
 				enum bpf_access_type type,
 				const struct bpf_prog *prog,
@@ -8718,6 +8720,15 @@ static bool xdp_is_valid_access(int off, int size,
 	case offsetof(struct xdp_md, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
+	case offsetof(struct xdp_md, rx_dev):
+		info->reg_type = PTR_TO_BTF_ID;
+		info->btf_id = btf_xdp_get_netdev_id[0];
+		info->btf = bpf_get_btf_vmlinux();
+	        if (IS_ERR_OR_NULL(info->btf))
+			return false;
+		if (size != sizeof(u64))
+			return false;
+		return true;
 	}
 
 	return __is_valid_xdp_access(off, size);
@@ -9808,6 +9819,14 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct net_device, ifindex));
 		break;
+	case offsetof(struct xdp_md, rx_dev):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, rxq));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct xdp_rxq_info, dev));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 94659f6b3395..50403eb3b6cf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6123,6 +6123,7 @@ struct xdp_md {
 	__u32 rx_queue_index;  /* rxq->queue_index  */
 
 	__u32 egress_ifindex;  /* txq->dev->ifindex */
+	__bpf_md_ptr(struct net_device *, rx_dev); /* rxq->dev */
 };
 
 /* DEVMAP map-value layout
-- 
2.33.0




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux