[RFC iproute2-next 2/2] Introduce an eBPF hookpoint for tx queue selection in the XPS (Transmit Packet Steering) code.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



WORK IN PROGRESS:
  * bpf program loading works!
  * txq steering via bpf program return code works!
  * bpf program unloading not working.
  * bpf program attached query not working.
---
 include/bpf_api.h            |  5 +++
 include/uapi/linux/if_link.h | 12 ++++++
 ip/Makefile                  |  2 +-
 ip/ip_common.h               |  4 ++
 ip/iplink.c                  |  7 ++++
 ip/iplink_xps.c              | 88 ++++++++++++++++++++++++++++++++++++++++++++
 lib/bpf.c                    |  6 +++
 7 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 ip/iplink_xps.c

diff --git a/include/bpf_api.h b/include/bpf_api.h
index 89d3488..d1a2d90 100644
--- a/include/bpf_api.h
+++ b/include/bpf_api.h
@@ -78,6 +78,11 @@
 	__section(ELF_SECTION_PROG)
 #endif
 
+#ifndef __section_xps_entry
+# define __section_xps_entry						\
+	__section(ELF_SECTION_PROG)
+#endif
+
 #ifndef __section_cls_entry
 # define __section_cls_entry						\
 	__section(ELF_SECTION_CLASSIFIER)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d36919f..9efd686 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -167,6 +167,7 @@ enum {
 	IFLA_NEW_IFINDEX,
 	IFLA_MIN_MTU,
 	IFLA_MAX_MTU,
+	IFLA_XPS,
 	__IFLA_MAX
 };
 
@@ -977,6 +978,17 @@ enum {
 
 #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
 
+/* XPS section */
+
+enum {
+	IFLA_XPS_UNSPEC,
+	IFLA_XPS_FD,
+	IFLA_XPS_ATTACHED,
+	__IFLA_XPS_MAX,
+};
+
+#define IFLA_XPS_MAX (__IFLA_XPS_MAX - 1)
+
 enum {
 	IFLA_EVENT_NONE,
 	IFLA_EVENT_REBOOT,		/* internal reset / reboot */
diff --git a/ip/Makefile b/ip/Makefile
index 5ab78d7..9ad1c53 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -5,7 +5,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
     ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o iplink_dummy.o \
     iplink_ifb.o iplink_nlmon.o iplink_team.o iplink_vcan.o iplink_vxcan.o \
     iplink_vlan.o link_veth.o link_gre.o iplink_can.o iplink_xdp.o \
-    iplink_macvlan.o ipl2tp.o link_vti.o link_vti6.o link_xfrm.o \
+    iplink_macvlan.o ipl2tp.o link_vti.o link_vti6.o link_xfrm.o iplink_xps.o \
     iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
     link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
     iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
diff --git a/ip/ip_common.h b/ip/ip_common.h
index cd916ec..805d7d2 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -145,6 +145,10 @@ int xdp_parse(int *argc, char ***argv, struct iplink_req *req, const char *ifnam
 	      bool generic, bool drv, bool offload);
 void xdp_dump(FILE *fp, struct rtattr *tb, bool link, bool details);
 
+/* iplink_xps.c */
+int xps_parse(int *argc, char ***argv, struct iplink_req *req);
+void xps_dump(FILE *fp, struct rtattr *tb);
+
 /* iplink_vrf.c */
 __u32 ipvrf_get_table(const char *name);
 int name_is_vrf(const char *name);
diff --git a/ip/iplink.c b/ip/iplink.c
index 212a088..4d6d557 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -101,6 +101,9 @@ void iplink_usage(void)
 		"			[ { xdp | xdpgeneric | xdpdrv | xdpoffload } { off |\n"
 		"				  object FILE [ section NAME ] [ verbose ] |\n"
 		"				  pinned FILE } ]\n"
+		"			[ xps { off |\n"
+		"				  object FILE [ section NAME ] [ verbose ] |\n"
+		"				  pinned FILE } ]\n"
 		"			[ master DEVICE ][ vrf NAME ]\n"
 		"			[ nomaster ]\n"
 		"			[ addrgenmode { eui64 | none | stable_secret | random } ]\n"
@@ -668,6 +671,10 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
 
 			if (offload && name == dev)
 				dev = NULL;
+		} else if (strcmp(*argv, "xps") == 0) {
+			NEXT_ARG();
+			if (xps_parse(&argc, &argv, req))
+				exit(-1);
 		} else if (strcmp(*argv, "netns") == 0) {
 			NEXT_ARG();
 			if (netns != -1)
diff --git a/ip/iplink_xps.c b/ip/iplink_xps.c
new file mode 100644
index 0000000..7e94164
--- /dev/null
+++ b/ip/iplink_xps.c
@@ -0,0 +1,88 @@
+/*
+ * iplink_xps.c XPS program loader
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:     Matthew Cover <matthew.cover@xxxxxxxxxxxxx>
+ *
+ *              Based on iplink_xdp.c by Daniel Borkmann <daniel@xxxxxxxxxxxxx>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <linux/bpf.h>
+
+#include "bpf_util.h"
+#include "ip_common.h"
+
+extern int force;
+
+struct xps_req {
+	struct iplink_req *req;
+	__u32 flags;
+};
+
+static void xps_ebpf_cb(void *raw, int fd, const char *annotation)
+{
+        struct xps_req *xps = raw;
+        struct iplink_req *req = xps->req;
+        struct rtattr *xps_attr;
+
+        xps_attr = addattr_nest(&req->n, sizeof(*req), IFLA_XPS);
+        addattr32(&req->n, sizeof(*req), IFLA_XPS_FD, fd);
+        addattr_nest_end(&req->n, xps_attr);
+}
+
+static const struct bpf_cfg_ops bpf_cb_ops = {
+	.ebpf_cb = xps_ebpf_cb,
+};
+
+static int xps_delete(struct iplink_req *req)
+{
+	xps_ebpf_cb(req, -1, NULL);
+	return 0;
+}
+
+int xps_parse(int *argc, char ***argv, struct iplink_req *req)
+{
+
+	struct bpf_cfg_in cfg = {
+		.type = BPF_PROG_TYPE_SOCKET_FILTER,
+		.argc = *argc,
+		.argv = *argv,
+	};
+
+	struct xps_req xps = {
+		.req = req,
+	};
+
+	if (*argc == 1) {
+		if (strcmp(**argv, "none") == 0 ||
+		    strcmp(**argv, "off") == 0)
+			return xps_delete(req);
+	}
+
+	if (bpf_parse_and_load_common(&cfg, &bpf_cb_ops, &xps))
+		return -1;
+
+	*argc = cfg.argc;
+	*argv = cfg.argv;
+	return 0;
+}
+
+void xps_dump(FILE *fp, struct rtattr *xps)
+{
+	struct rtattr *tb[IFLA_XPS_MAX + 1];
+
+	parse_rtattr_nested(tb, IFLA_XPS_MAX, xps);
+	if (!tb[IFLA_XPS_ATTACHED] ||
+	    !rta_getattr_u8(tb[IFLA_XPS_ATTACHED]))
+		return;
+
+	fprintf(fp, "xps ");
+	/* More to come here in future for 'ip -d link' (digest, etc) ... */
+}
diff --git a/lib/bpf.c b/lib/bpf.c
index 7d2a322..e883afb 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -60,6 +60,7 @@ static const enum bpf_prog_type __bpf_types[] = {
 	BPF_PROG_TYPE_LWT_IN,
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
+	BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
 static const struct bpf_prog_meta __bpf_prog_meta[] = {
@@ -100,6 +101,11 @@ static const struct bpf_prog_meta __bpf_prog_meta[] = {
 		.subdir		= "ip",
 		.section	= ELF_SECTION_PROG,
 	},
+	[BPF_PROG_TYPE_SOCKET_FILTER] = {
+		.type		= "xps",
+		.subdir		= "xps",
+		.section	= ELF_SECTION_PROG,
+	},
 };
 
 static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
-- 
1.8.3.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux