Re: ipset support in tc

Linux Advanced Routing and Traffic Control

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> That would be nice, thanks.
> If it doesn't build on 3.3 let me know and I'll give it a shot myself.
OK, I compiled it all and (very) briefly tested it (a simple subnet test applied on src). In short - it works! 

I had to make a few adjustments though - the kernel part compiled without any problems (there were a few offsets when applying the kernel patch, but that was to be expected), but I've got the following errors during compilation of iproute2 (I used version 3.3):

em_ipset.c:34: error: nested redefinition of 'enum ip_set_dim'
em_ipset.c:34: error: redeclaration of 'enum ip_set_dim'
em_ipset.c:35: error: redeclaration of enumerator 'IPSET_DIM_ZERO'
/usr/include/linux/netfilter/ipset/ip_set.h:185: note: previous definition of 'IPSET_DIM_ZERO' was here
em_ipset.c:36: error: redeclaration of enumerator 'IPSET_DIM_ONE'
/usr/include/linux/netfilter/ipset/ip_set.h:186: note: previous definition of 'IPSET_DIM_ONE' was here
em_ipset.c:37: error: redeclaration of enumerator 'IPSET_DIM_TWO'
/usr/include/linux/netfilter/ipset/ip_set.h:187: note: previous definition of 'IPSET_DIM_TWO' was here
em_ipset.c:38: error: redeclaration of enumerator 'IPSET_DIM_THREE'
/usr/include/linux/netfilter/ipset/ip_set.h:188: note: previous definition of 'IPSET_DIM_THREE' was here
em_ipset.c:42: error: redeclaration of enumerator 'IPSET_DIM_MAX'
/usr/include/linux/netfilter/ipset/ip_set.h:192: note: previous definition of 'IPSET_DIM_MAX' was here
em_ipset.c:46: error: redefinition of 'union ip_set_name_index'
em_ipset.c:52: error: redefinition of 'struct ip_set_req_get_set'
em_ipset.c:62: error: redefinition of 'struct ip_set_req_version'
make[1]: *** [em_ipset.o] Error 1
make[1]: *** Waiting for unfinished jobs....


I've amended your 2 patches and I am attaching a revised version, though be aware that I am using ipset v6.11. I don't know whether these definitions should be present in em_ipset.c for the older version of ipset - they are certainly defined in ip_set.h for my own version of ipset. So, after applying this revised patch (attached) all is well:

[root@test1 ~]# ipset l nas
Name: nas
Type: hash:net
Header: family inet hashsize 1024 maxelem 65536 
Size in memory: 16792
References: 5
Members:
10.1.1.17

[root@test1 ~]# tc filter add dev ifb0 protocol ip parent f:0 prio 10\
basic match ipset'(nas src)'\
flowid f:22

After doing a simple file transfer, I get this:

class hfsc f:22 parent f:1 leaf 5: sc m1 0bit d 0us m2 98000Kbit ul m1 0bit d 0us m2 100000Kbit 
 Sent *57592449* bytes *40757* pkt (dropped 0, overlimits 0 requeues 0) 
 backlog 0b 0p requeues 0 
 period 40474 work 57592449 bytes rtwork 41419592 bytes level 0 


So, clearly there is a match and it seems to work as far as I can see. In the coming days I will test it with more complex sets, as well as applying multiple set statements (using 'and' and 'or'). If that works, I'll check the performance to see how it fares compared to the "standard" tc with hard-coded values.
--- a/etc/iproute2/ematch_map	2012-03-20 00:27:12.000000000 +0000
+++ b/etc/iproute2/ematch_map	2012-06-11 01:22:23.005423418 +0100
@@ -3,3 +3,6 @@
 2	nbyte
 3	u32
 4	meta
+5	text
+6	vlan
+7	ipset
--- a/include/linux/pkt_cls.h	2012-03-20 00:27:12.000000000 +0000
+++ b/include/linux/pkt_cls.h	2012-06-11 01:22:23.002423418 +0100
@@ -452,7 +452,8 @@
 #define	TCF_EM_META		4
 #define	TCF_EM_TEXT		5
 #define        TCF_EM_VLAN		6
-#define	TCF_EM_MAX		6
+#define	TCF_EM_IPSET		7
+#define	TCF_EM_MAX		7
 
 enum {
 	TCF_EM_PROG_TC
--- a/tc/Makefile	2012-03-20 00:27:12.000000000 +0000
+++ b/tc/Makefile	2012-06-11 01:22:23.002423418 +0100
@@ -46,6 +46,7 @@
 TCMODULES += em_cmp.o
 TCMODULES += em_u32.o
 TCMODULES += em_meta.o
+TCMODULES += em_ipset.o
 TCMODULES += q_mqprio.o
 
 TCSO :=
--- a/tc/em_ipset.c	1970-01-01 01:00:00.000000000 +0100
+++ b/tc/em_ipset.c	2012-06-11 01:25:14.379425306 +0100
@@ -0,0 +1,242 @@
+/*
+ * em_ipset.c		IPset Ematch
+ *
+ * (C) 2012 Florian Westphal <fw@xxxxxxxxx>
+ *
+ * Parts taken from iptables libxt_set.h:
+ * Copyright (C) 2000-2002 Joakim Axelsson <gozem@xxxxxxxx>
+ *                         Patrick Schaaf <bof@xxxxxx>
+ *                         Martin Josefsson <gandalf@xxxxxxxxxxxxxx>
+ * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@xxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <errno.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+#include <xtables.h>
+#include <linux/netfilter/ipset/ip_set.h>
+
+#include "m_ematch.h"
+
+#define SO_IP_SET		83
+#define IPSET_INVALID_ID	65535
+#define IP_SET_OP_GET_BYNAME	0x00000006	/* Get set index by name */
+#define IP_SET_OP_GET_BYINDEX	0x00000007	/* Get set name by index */
+
+/* Uses ip_set_req_get_set */
+#define IP_SET_OP_VERSION	0x00000100	/* Ask kernel version */
+
+/* xt_ipset_info; duplicated because "ip_set_id_t" isn't typedef'd in US */
+struct em_ipset_info {
+	__u16 index; /* ip_set_id_t */
+	__u8 dim;
+	__u8 flags;
+};
+
+extern struct ematch_util ipset_ematch_util;
+
+static int get_version(unsigned *version)
+{
+	int res, sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+	struct ip_set_req_version req_version;
+	socklen_t size = sizeof(req_version);
+
+	if (sockfd < 0) {
+		fputs("Can't open socket to ipset.\n", stderr);
+		return -1;
+	}
+
+	req_version.op = IP_SET_OP_VERSION;
+	res = getsockopt(sockfd, SOL_IP, SO_IP_SET, &req_version, &size);
+	if (res != 0) {
+		perror("xt_set getsockopt");
+		return -1;
+	}
+
+	*version = req_version.version;
+	return sockfd;
+}
+
+static int do_getsockopt(struct ip_set_req_get_set *req)
+{
+	int sockfd, res;
+	socklen_t size = sizeof(struct ip_set_req_get_set);
+	sockfd = get_version(&req->version);
+	if (sockfd < 0)
+		return -1;
+	res = getsockopt(sockfd, SOL_IP, SO_IP_SET, req, &size);
+	if (res != 0)
+		perror("Problem when communicating with ipset");
+	close(sockfd);
+	if (res != 0)
+		return -1;
+
+	if (size != sizeof(struct ip_set_req_get_set)) {
+		fprintf(stderr,
+			"Incorrect return size from kernel during ipset lookup, "
+			"(want %zu, got %zu)\n",
+			sizeof(struct ip_set_req_get_set), (size_t)size);
+		return -1;
+	}
+
+	return res;
+}
+
+static int
+get_set_byid(char *setname, unsigned int idx)
+{
+	struct ip_set_req_get_set req;
+	int res;
+
+	req.op = IP_SET_OP_GET_BYINDEX;
+	req.set.index = idx;
+	res = do_getsockopt(&req);
+	if (res != 0)
+		return -1;
+	if (req.set.name[0] == '\0') {
+		fprintf(stderr,
+			"Set with index %i in kernel doesn't exist.\n", idx);
+		return -1;
+	}
+
+	strncpy(setname, req.set.name, IPSET_MAXNAMELEN);
+	return 0;
+}
+
+static int
+get_set_byname(const char *setname, struct em_ipset_info *info)
+{
+	struct ip_set_req_get_set req;
+	int res;
+
+	req.op = IP_SET_OP_GET_BYNAME;
+	strncpy(req.set.name, setname, IPSET_MAXNAMELEN);
+	req.set.name[IPSET_MAXNAMELEN - 1] = '\0';
+	res = do_getsockopt(&req);
+	if (res != 0)
+		return -1;
+	if (req.set.index == IPSET_INVALID_ID)
+		return -1;
+	info->index = req.set.index;
+	return 0;
+}
+
+static int
+parse_dirs(const char *opt_arg, struct em_ipset_info *info)
+{
+        char *saved = strdup(opt_arg);
+        char *ptr, *tmp = saved;
+
+	if (!tmp) {
+		perror("strdup");
+		return -1;
+	}
+
+        while (info->dim < IPSET_DIM_MAX && tmp != NULL) {
+                info->dim++;
+                ptr = strsep(&tmp, ",");
+                if (strncmp(ptr, "src", 3) == 0)
+                        info->flags |= (1 << info->dim);
+                else if (strncmp(ptr, "dst", 3) != 0) {
+                        fputs("You must spefify (the comma separated list of) 'src' or 'dst'\n", stderr);
+			free(saved);
+			return -1;
+		}
+        }
+
+        if (tmp)
+                fprintf(stderr, "Can't be more src/dst options than %u", IPSET_DIM_MAX);
+        free(saved);
+	return tmp ? -1 : 0;
+}
+
+static void ipset_print_usage(FILE *fd)
+{
+	fprintf(fd,
+	    "Usage: ipset(SETNAME FLAGS)\n" \
+	    "where: SETNAME:= string\n" \
+	    "       FLAGS  := { FLAG[,FLAGS] }\n" \
+	    "       FLAG   := { src | dst }\n" \
+	    "\n" \
+	    "Example: ipset(bulk src,dst)\n");
+}
+
+static int ipset_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr,
+			    struct bstr *args)
+{
+	struct bstr *a;
+	struct em_ipset_info set_info;
+	int ret;
+
+	memset(&set_info, 0, sizeof(set_info));
+
+#define PARSE_ERR(CARG, FMT, ARGS...) \
+	em_parse_error(EINVAL, args, CARG, &ipset_ematch_util, FMT ,##ARGS)
+
+	if (args == NULL)
+		return PARSE_ERR(a, "ipset: missing set name");
+
+	if (args->len >= IPSET_MAXNAMELEN)
+		return PARSE_ERR(a, "ipset: set name too long (max %u)", IPSET_MAXNAMELEN - 1);
+	ret = get_set_byname(args->data, &set_info);
+	if (ret < 0)
+		return PARSE_ERR(args, "ipset: unknown set name '%s'", args->data);
+
+	if (args->next == NULL)
+		return PARSE_ERR(args, "ipset: missing set flags");
+
+	args = bstr_next(args);
+	if (parse_dirs(args->data, &set_info))
+		return PARSE_ERR(args, "ipset: error parsing set flags");
+
+	if (args->next) {
+		args = bstr_next(args);
+		return PARSE_ERR(args, "ipset: unknown parameter");
+	}
+
+	addraw_l(n, MAX_MSG, hdr, sizeof(*hdr));
+	addraw_l(n, MAX_MSG, &set_info, sizeof(set_info));
+
+#undef PARSE_ERR
+	return 0;
+}
+
+static int ipset_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data,
+			    int data_len)
+{
+	int i;
+        char setname[IPSET_MAXNAMELEN];
+	const struct em_ipset_info *set_info = data;
+
+	if (data_len != sizeof(*set_info)) {
+		fprintf(stderr, "xt_ipset_info struct size mismatch\n");
+		return -1;
+	}
+
+        if (get_set_byid(setname, set_info->index))
+		return -1;
+	fputs(setname, fd);
+	for (i = 1; i <= set_info->dim; i++) {
+		fprintf(fd, "%s%s", i == 1 ? " " : ",", set_info->flags & (1 << i) ? "src" : "dst");
+	}
+
+	return 0;
+}
+
+struct ematch_util ipset_ematch_util = {
+	.kind = "ipset",
+	.kind_num = TCF_EM_IPSET,
+	.parse_eopt = ipset_parse_eopt,
+	.print_eopt = ipset_print_eopt,
+	.print_usage = ipset_print_usage
+};

[Index of Archives]     [LARTC Home Page]     [Netfilter]     [Netfilter Development]     [Network Development]     [Bugtraq]     [GCC Help]     [Yosemite News]     [Linux Kernel]     [Fedora Users]
  Powered by Linux