[PATCH bpf-next] xsk: support AF_PACKET

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In xsk mode, users cannot use AF_PACKET(tcpdump) to observe the current
rx/tx data packets. This feature is very important in many cases. So
this patch allows AF_PACKET to obtain xsk packages.

By default, AF_PACKET is based on ptype_base/ptype_all in dev.c to
obtain data packets. But xsk is not suitable for calling these
callbacks, because it may send the packet to other protocol stacks. So
the method I used is to let AF_PACKET get the data packet from xsk
alone.

Signed-off-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx>
---
 include/net/xdp_sock.h |  15 +++++
 net/packet/af_packet.c |  35 +++++++++--
 net/packet/internal.h  |   7 +++
 net/xdp/Makefile       |   2 +-
 net/xdp/xsk.c          |   9 +++
 net/xdp/xsk_packet.c   | 129 +++++++++++++++++++++++++++++++++++++++++
 net/xdp/xsk_packet.h   |  44 ++++++++++++++
 7 files changed, 234 insertions(+), 7 deletions(-)
 create mode 100644 net/xdp/xsk_packet.c
 create mode 100644 net/xdp/xsk_packet.h

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 9c0722c6d7ac..b0acf0293132 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -17,6 +17,11 @@ struct net_device;
 struct xsk_queue;
 struct xdp_buff;
 
+struct xsk_packet {
+	struct list_head list;
+	struct packet_type *pt;
+};
+
 struct xdp_umem {
 	void *addrs;
 	u64 size;
@@ -79,6 +84,8 @@ struct xdp_sock {
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
+void xsk_add_pack(struct xsk_packet *xpt);
+void __xsk_remove_pack(struct xsk_packet *xpt);
 
 #else
 
@@ -96,6 +103,14 @@ static inline void __xsk_map_flush(void)
 {
 }
 
+void xsk_add_pack(struct xsk_packet *xpt)
+{
+}
+
+void __xsk_remove_pack(struct xsk_packet *xpt)
+{
+}
+
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 597d798ac0a5..2720b51d13a6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -303,10 +303,14 @@ static void __register_prot_hook(struct sock *sk)
 	struct packet_sock *po = pkt_sk(sk);
 
 	if (!po->running) {
-		if (po->fanout)
+		if (po->fanout) {
 			__fanout_link(sk, po);
-		else
+		} else {
 			dev_add_pack(&po->prot_hook);
+#ifdef CONFIG_XDP_SOCKETS
+			xsk_add_pack(&po->xsk_pt);
+#endif
+		}
 
 		sock_hold(sk);
 		po->running = 1;
@@ -333,10 +337,14 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
 
 	po->running = 0;
 
-	if (po->fanout)
+	if (po->fanout) {
 		__fanout_unlink(sk, po);
-	else
+	} else {
 		__dev_remove_pack(&po->prot_hook);
+#ifdef CONFIG_XDP_SOCKETS
+		__xsk_remove_pack(&po->xsk_pt);
+#endif
+	}
 
 	__sock_put(sk);
 
@@ -1483,8 +1491,12 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po)
 	rcu_assign_pointer(f->arr[f->num_members], sk);
 	smp_wmb();
 	f->num_members++;
-	if (f->num_members == 1)
+	if (f->num_members == 1) {
 		dev_add_pack(&f->prot_hook);
+#ifdef CONFIG_XDP_SOCKETS
+		xsk_add_pack(&f->xsk_pt);
+#endif
+	}
 	spin_unlock(&f->lock);
 }
 
@@ -1504,8 +1516,12 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 			   rcu_dereference_protected(f->arr[f->num_members - 1],
 						     lockdep_is_held(&f->lock)));
 	f->num_members--;
-	if (f->num_members == 0)
+	if (f->num_members == 0) {
 		__dev_remove_pack(&f->prot_hook);
+#ifdef CONFIG_XDP_SOCKETS
+		__xsk_remove_pack(&po->xsk_pt);
+#endif
+	}
 	spin_unlock(&f->lock);
 }
 
@@ -1737,6 +1753,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
 		match->prot_hook.af_packet_priv = match;
 		match->prot_hook.id_match = match_fanout_group;
 		match->max_num_members = args->max_num_members;
+#ifdef CONFIG_XDP_SOCKETS
+		match->xsk_pt.pt = &match->prot_hook;
+#endif
+
 		list_add(&match->list, &fanout_list);
 	}
 	err = -EINVAL;
@@ -3315,6 +3335,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 		po->prot_hook.func = packet_rcv_spkt;
 
 	po->prot_hook.af_packet_priv = sk;
+#ifdef CONFIG_XDP_SOCKETS
+	po->xsk_pt.pt = &po->prot_hook;
+#endif
 
 	if (proto) {
 		po->prot_hook.type = proto;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 48af35b1aed2..d224b926588a 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -3,6 +3,7 @@
 #define __PACKET_INTERNAL_H__
 
 #include <linux/refcount.h>
+#include <net/xdp_sock.h>
 
 struct packet_mclist {
 	struct packet_mclist	*next;
@@ -94,6 +95,9 @@ struct packet_fanout {
 	spinlock_t		lock;
 	refcount_t		sk_ref;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
+#ifdef CONFIG_XDP_SOCKETS
+	struct xsk_packet	xsk_pt;
+#endif
 	struct sock	__rcu	*arr[];
 };
 
@@ -136,6 +140,9 @@ struct packet_sock {
 	struct net_device __rcu	*cached_dev;
 	int			(*xmit)(struct sk_buff *skb);
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
+#ifdef CONFIG_XDP_SOCKETS
+	struct xsk_packet	xsk_pt;
+#endif
 	atomic_t		tp_drops ____cacheline_aligned_in_smp;
 };
 
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 30cdc4315f42..bcac0591879b 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o xskmap.o
+obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o xskmap.o xsk_packet.o
 obj-$(CONFIG_XDP_SOCKETS) += xsk_buff_pool.o
 obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cd62d4ba87a9..fc97e7f9e4cb 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -28,6 +28,7 @@
 
 #include "xsk_queue.h"
 #include "xdp_umem.h"
+#include "xsk_packet.h"
 #include "xsk.h"
 
 #define TX_BATCH_SIZE 32
@@ -156,6 +157,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 	int err;
 
 	addr = xp_get_handle(xskb);
+	xsk_rx_packet_deliver(xs, addr, len);
 	err = xskq_prod_reserve_desc(xs->rx, addr, len);
 	if (err) {
 		xs->rx_queue_full++;
@@ -347,6 +349,8 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
 		if (xskq_prod_reserve_addr(pool->cq, desc->addr))
 			goto out;
 
+		xsk_tx_zc_packet_deliver(xs, desc);
+
 		xskq_cons_release(xs->tx);
 		rcu_read_unlock();
 		return true;
@@ -576,6 +580,8 @@ static int xsk_generic_xmit(struct sock *sk)
 		}
 		spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 
+		xsk_tx_packet_deliver(xs, &desc, skb);
+
 		err = __dev_direct_xmit(skb, xs->queue_id);
 		if  (err == NETDEV_TX_BUSY) {
 			/* Tell user-space to retry the send */
@@ -1467,6 +1473,9 @@ static int __init xsk_init(void)
 
 	for_each_possible_cpu(cpu)
 		INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
+
+	INIT_LIST_HEAD(&xsk_pt);
+
 	return 0;
 
 out_pernet:
diff --git a/net/xdp/xsk_packet.c b/net/xdp/xsk_packet.c
new file mode 100644
index 000000000000..41005f214d6d
--- /dev/null
+++ b/net/xdp/xsk_packet.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets packet api
+ *
+ * Author: Xuan Zhuo <xuanzhuo.dxf@xxxxxxxxxxxxxxxxx>
+ */
+
+#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
+#include "xsk.h"
+#include "xsk_packet.h"
+
+struct list_head xsk_pt __read_mostly;
+static DEFINE_SPINLOCK(pt_lock);
+
+static struct sk_buff *xsk_pt_alloc_skb(struct xdp_sock *xs,
+					struct xdp_desc *desc)
+{
+	struct sk_buff *skb;
+	void *buffer;
+	int err;
+
+	skb = alloc_skb(desc->len, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	skb_put(skb, desc->len);
+
+	buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
+	err = skb_store_bits(skb, 0, buffer, desc->len);
+	if (unlikely(err)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static struct sk_buff *xsk_pt_get_skb(struct xdp_sock *xs,
+				      struct xdp_desc *desc,
+				      struct sk_buff *skb,
+				      bool rx)
+{
+	struct net_device *dev = xs->dev;
+
+	/* We must copy the data, because skb may exist for a long time
+	 * on AF_PACKET. If the buffer of the xsk is used by skb, the
+	 * release of xsk and the reuse of the buffer will be affected.
+	 */
+	if (!skb || (dev->priv_flags & IFF_TX_SKB_NO_LINEAR))
+		skb = xsk_pt_alloc_skb(xs, desc);
+	else
+		skb = skb_clone(skb, GFP_ATOMIC);
+
+	if (!skb)
+		return NULL;
+
+	skb->protocol = eth_type_trans(skb, dev);
+	skb_reset_network_header(skb);
+	skb->transport_header = skb->network_header;
+	__net_timestamp(skb);
+
+	if (!rx)
+		skb->pkt_type = PACKET_OUTGOING;
+
+	return skb;
+}
+
+void __xsk_pt_deliver(struct xdp_sock *xs, struct sk_buff *skb,
+		      struct xdp_desc *desc, bool rx)
+{
+	struct packet_type *pt_prev = NULL;
+	struct packet_type *ptype;
+	struct xsk_packet *xpt;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(xpt, &xsk_pt, list) {
+		ptype = xpt->pt;
+
+		if (!rx && ptype->ignore_outgoing)
+			continue;
+
+		if (pt_prev) {
+			refcount_inc(&skb->users);
+			pt_prev->func(skb, skb->dev, pt_prev, skb->dev);
+			pt_prev = ptype;
+			continue;
+		}
+
+		skb = xsk_pt_get_skb(xs, desc, skb, rx);
+		if (unlikely(!skb))
+			goto out_unlock;
+
+		pt_prev = ptype;
+	}
+
+	if (pt_prev)
+		pt_prev->func(skb, skb->dev, pt_prev, skb->dev);
+
+out_unlock:
+	rcu_read_unlock();
+}
+
+void xsk_add_pack(struct xsk_packet *xpt)
+{
+	if (xpt->pt->type != htons(ETH_P_ALL))
+		return;
+
+	spin_lock(&pt_lock);
+	list_add_rcu(&xpt->list, &xsk_pt);
+	spin_unlock(&pt_lock);
+}
+
+void __xsk_remove_pack(struct xsk_packet *xpt)
+{
+	struct xsk_packet *xpt1;
+
+	spin_lock(&pt_lock);
+
+	list_for_each_entry(xpt1, &xsk_pt, list) {
+		if (xpt1 == xpt) {
+			list_del_rcu(&xpt1->list);
+			goto out;
+		}
+	}
+
+	pr_warn("xsk_remove_pack: %p not found\n", xpt);
+out:
+	spin_unlock(&pt_lock);
+}
diff --git a/net/xdp/xsk_packet.h b/net/xdp/xsk_packet.h
new file mode 100644
index 000000000000..55d30fa8828b
--- /dev/null
+++ b/net/xdp/xsk_packet.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __XSK_PACKET_H__
+#define __XSK_PACKET_H__
+extern struct list_head xsk_pt __read_mostly;
+
+void __xsk_pt_deliver(struct xdp_sock *xs, struct sk_buff *skb,
+		      struct xdp_desc *desc, bool rx);
+
+static inline void xsk_tx_packet_deliver(struct xdp_sock *xs,
+					 struct xdp_desc *desc,
+					 struct sk_buff *skb)
+{
+	if (likely(list_empty(&xsk_pt)))
+		return;
+
+	local_bh_disable();
+	__xsk_pt_deliver(xs, skb, desc, false);
+	local_bh_enable();
+}
+
+static inline void xsk_tx_zc_packet_deliver(struct xdp_sock *xs,
+					    struct xdp_desc *desc)
+{
+	if (likely(list_empty(&xsk_pt)))
+		return;
+
+	__xsk_pt_deliver(xs, NULL, desc, false);
+}
+
+static inline void xsk_rx_packet_deliver(struct xdp_sock *xs, u64 addr, u32 len)
+{
+	struct xdp_desc desc;
+
+	if (likely(list_empty(&xsk_pt)))
+		return;
+
+	desc.addr = addr;
+	desc.len = len;
+
+	__xsk_pt_deliver(xs, NULL, &desc, true);
+}
+
+#endif /* __XSK_PACKET_H__ */
-- 
2.31.0




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux