[RFC net-next 1/2] Introduce an eBPF hookpoint for tx queue selection in the XPS (Transmit Packet Steering) code.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



WORK IN PROGRESS:
  * bpf program loading works!
  * txq steering via bpf program return code works!
  * bpf program unloading not working.
  * bpf program attached query not working.
---
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h | 12 +++++++++
 net/core/dev.c               | 61 ++++++++++++++++++++++++++++++++++++-------
 net/core/rtnetlink.c         | 62 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 129 insertions(+), 9 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9eda1c3..88e37d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1966,6 +1966,7 @@ struct net_device {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps __rcu *xps_cpus_map;
 	struct xps_dev_maps __rcu *xps_rxqs_map;
+	struct bpf_prog __rcu     *xps_prog;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -2147,6 +2148,8 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
 					 struct sk_buff *skb,
 					 struct net_device *sb_dev);
 
+int dev_change_xps_fd(struct net_device *dev, int fd);
+
 /* returns the headroom that the master device needs to take in account
  * when forwarding to this dev
  */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4a8c02c..a23d241 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -167,6 +167,7 @@ enum {
 	IFLA_NEW_IFINDEX,
 	IFLA_MIN_MTU,
 	IFLA_MAX_MTU,
+	IFLA_XPS,
 	__IFLA_MAX
 };
 
@@ -979,6 +980,17 @@ enum {
 
 #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
 
+/* XPS section */
+
+enum {
+	IFLA_XPS_UNSPEC,
+	IFLA_XPS_FD,
+	IFLA_XPS_ATTACHED,
+	__IFLA_XPS_MAX,
+};
+
+#define IFLA_XPS_MAX (__IFLA_XPS_MAX - 1)
+
 enum {
 	IFLA_EVENT_NONE,
 	IFLA_EVENT_REBOOT,		/* internal reset / reboot */
diff --git a/net/core/dev.c b/net/core/dev.c
index 71b18e8..a46d42b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3663,26 +3663,34 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
+	struct bpf_prog *prog;
 	struct sock *sk = skb->sk;
+	int bpf_ret = -1;
 	int queue_index = -1;
 
 	if (!static_key_false(&xps_needed))
 		return -1;
 
 	rcu_read_lock();
-	if (!static_key_false(&xps_rxqs_needed))
-		goto get_cpus_map;
 
-	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
-	if (dev_maps) {
-		int tci = sk_rx_queue_get(sk);
+	prog = rcu_dereference(dev->xps_prog);
+	if (prog) {
+		bpf_ret = bpf_prog_run_clear_cb(prog, skb);
+		if (bpf_ret >= 0)
+			queue_index = bpf_ret % dev->num_tx_queues;
+	}
 
-		if (tci >= 0 && tci < dev->num_rx_queues)
-			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
-							  tci);
+	if (queue_index < 0 && static_key_false(&xps_rxqs_needed)) {
+		dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
+		if (dev_maps) {
+			int tci = sk_rx_queue_get(sk);
+
+			if (tci >= 0 && tci < dev->num_rx_queues)
+				queue_index = __get_xps_queue_idx(dev, skb,
+								dev_maps, tci);
+		}
 	}
 
-get_cpus_map:
 	if (queue_index < 0) {
 		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
 		if (dev_maps) {
@@ -8170,6 +8178,41 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 	return err;
 }
 
+static void dev_xps_install(struct net_device *dev, struct bpf_prog *prog)
+{
+#ifdef CONFIG_XPS
+	struct bpf_prog *old = rtnl_dereference(dev->xps_prog);
+	struct bpf_prog *new = prog;
+
+	rcu_assign_pointer(dev->xps_prog, new);
+	if (old)
+		bpf_prog_put(old);
+#endif
+}
+
+/**
+ *	dev_change_xps_fd - set or clear a bpf program for tx queue selection for a device
+ *	@dev: device
+ *	@fd: new program fd or negative value to clear
+ *
+ *	Set or clear a bpf program for a device
+ */
+int dev_change_xps_fd(struct net_device *dev, int fd)
+{
+	struct bpf_prog *prog = NULL;
+
+	ASSERT_RTNL();
+
+	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	dev_xps_install(dev, prog);
+
+	return 0;
+}
+
 /**
  *	dev_new_index	-	allocate an ifindex
  *	@net: the applicable net namespace
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1ee6460..202b59a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -980,6 +980,15 @@ static size_t rtnl_xdp_size(void)
 	return xdp_size;
 }
 
+static size_t rtnl_xps_size(void)
+{
+	size_t xps_size = nla_total_size(0) +	/* nest IFLA_XPS */
+			  nla_total_size(1) +	/* XPS_ATTACHED */
+			  nla_total_size(4);	/* XPS_PROG_ID */
+
+	return xps_size;
+}
+
 static noinline size_t if_nlmsg_size(const struct net_device *dev,
 				     u32 ext_filter_mask)
 {
@@ -1018,6 +1027,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
 	       + rtnl_xdp_size() /* IFLA_XDP */
+	       + rtnl_xps_size() /* IFLA_XPS */
 	       + nla_total_size(4)  /* IFLA_EVENT */
 	       + nla_total_size(4)  /* IFLA_NEW_NETNSID */
 	       + nla_total_size(4)  /* IFLA_NEW_IFINDEX */
@@ -1455,6 +1465,31 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 	return err;
 }
 
+static int rtnl_xps_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *xps;
+	struct bpf_prog *xps_prog;
+	int err;
+
+	ASSERT_RTNL();
+
+	xps = nla_nest_start(skb, IFLA_XPS);
+	if (!xps)
+		return -EMSGSIZE;
+
+	xps_prog = rtnl_dereference(dev->xps_prog);
+	if (xps_prog) {
+		err = nla_put_u8(skb, IFLA_XPS_ATTACHED, xps_prog->aux->id);
+		if (err) {
+			nla_nest_cancel(skb, xps);
+			return err;
+		}
+	}
+
+	nla_nest_end(skb, xps);
+	return 0;
+}
+
 static u32 rtnl_get_event(unsigned long event)
 {
 	u32 rtnl_event_type = IFLA_EVENT_NONE;
@@ -1697,6 +1732,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 		goto nla_put_failure_rcu;
 	rcu_read_unlock();
 
+	if (rtnl_xps_fill(skb, dev))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -1750,6 +1788,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
 	[IFLA_MIN_MTU]		= { .type = NLA_U32 },
 	[IFLA_MAX_MTU]		= { .type = NLA_U32 },
+	[IFLA_XPS]		= { .type = NLA_NESTED },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1801,6 +1840,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	[IFLA_XDP_PROG_ID]	= { .type = NLA_U32 },
 };
 
+static const struct nla_policy ifla_xps_policy[IFLA_XPS_MAX + 1] = {
+	[IFLA_XPS_FD]		= { .type = NLA_S32 },
+	[IFLA_XPS_ATTACHED]	= { .type = NLA_U8 },
+};
+
 static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
 {
 	const struct rtnl_link_ops *ops = NULL;
@@ -2709,6 +2753,24 @@ static int do_setlink(const struct sk_buff *skb,
 		}
 	}
 
+	if (tb[IFLA_XPS]) {
+		struct nlattr  *xps[IFLA_XPS_MAX + 1];
+
+		err = nla_parse_nested_deprecated(xps, IFLA_XPS_MAX,
+						  tb[IFLA_XPS],
+						  ifla_xps_policy, NULL);
+		if (err < 0)
+			goto errout;
+
+		if (xps[IFLA_XPS_FD]) {
+			err = dev_change_xps_fd(dev,
+						nla_get_s32(xps[IFLA_XPS_FD]));
+			if (err)
+				goto errout;
+			status |= DO_SETLINK_NOTIFY;
+		}
+	}
+
 errout:
 	if (status & DO_SETLINK_MODIFIED) {
 		if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
-- 
1.8.3.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux