> That would be nice, thanks. > If it doesn't build on 3.3 let me know and I'll give it a shot myself. OK, I compiled it all and (very) briefly tested it (a simple subnet test applied on src). In short - it works! I had to make a few adjustments though - the kernel part compiled without any problems (there were a few offsets when applying the kernel patch, but that was to be expected), but I've got the following errors during compilation of iproute2 (I used version 3.3): em_ipset.c:34: error: nested redefinition of 'enum ip_set_dim' em_ipset.c:34: error: redeclaration of 'enum ip_set_dim' em_ipset.c:35: error: redeclaration of enumerator 'IPSET_DIM_ZERO' /usr/include/linux/netfilter/ipset/ip_set.h:185: note: previous definition of 'IPSET_DIM_ZERO' was here em_ipset.c:36: error: redeclaration of enumerator 'IPSET_DIM_ONE' /usr/include/linux/netfilter/ipset/ip_set.h:186: note: previous definition of 'IPSET_DIM_ONE' was here em_ipset.c:37: error: redeclaration of enumerator 'IPSET_DIM_TWO' /usr/include/linux/netfilter/ipset/ip_set.h:187: note: previous definition of 'IPSET_DIM_TWO' was here em_ipset.c:38: error: redeclaration of enumerator 'IPSET_DIM_THREE' /usr/include/linux/netfilter/ipset/ip_set.h:188: note: previous definition of 'IPSET_DIM_THREE' was here em_ipset.c:42: error: redeclaration of enumerator 'IPSET_DIM_MAX' /usr/include/linux/netfilter/ipset/ip_set.h:192: note: previous definition of 'IPSET_DIM_MAX' was here em_ipset.c:46: error: redefinition of 'union ip_set_name_index' em_ipset.c:52: error: redefinition of 'struct ip_set_req_get_set' em_ipset.c:62: error: redefinition of 'struct ip_set_req_version' make[1]: *** [em_ipset.o] Error 1 make[1]: *** Waiting for unfinished jobs.... I've amended your 2 patches and I am attaching a revised version, though be aware that I am using ipset v6.11. I don't know whether these definitions should be present in em_ipset.c for the older version of ipset - they are certainly defined in ip_set.h for my own version of ipset. So, after applying this revised patch (attached) all is well: [root@test1 ~]# ipset l nas Name: nas Type: hash:net Header: family inet hashsize 1024 maxelem 65536 Size in memory: 16792 References: 5 Members: 10.1.1.17 [root@test1 ~]# tc filter add dev ifb0 protocol ip parent f:0 prio 10\ basic match ipset'(nas src)'\ flowid f:22 After doing a simple file transfer, I get this: class hfsc f:22 parent f:1 leaf 5: sc m1 0bit d 0us m2 98000Kbit ul m1 0bit d 0us m2 100000Kbit Sent *57592449* bytes *40757* pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 period 40474 work 57592449 bytes rtwork 41419592 bytes level 0 So, clearly there is a match and it seems to work as far as I can see. In the coming days I will test it with more complex sets, as well as applying multiple set statements (using 'and' and 'or'). If that works, I'll check the performance to see how it fares compared to the "standard" tc with hard-coded values.
--- a/etc/iproute2/ematch_map 2012-03-20 00:27:12.000000000 +0000 +++ b/etc/iproute2/ematch_map 2012-06-11 01:22:23.005423418 +0100 @@ -3,3 +3,6 @@ 2 nbyte 3 u32 4 meta +5 text +6 vlan +7 ipset --- a/include/linux/pkt_cls.h 2012-03-20 00:27:12.000000000 +0000 +++ b/include/linux/pkt_cls.h 2012-06-11 01:22:23.002423418 +0100 @@ -452,7 +452,8 @@ #define TCF_EM_META 4 #define TCF_EM_TEXT 5 #define TCF_EM_VLAN 6 -#define TCF_EM_MAX 6 +#define TCF_EM_IPSET 7 +#define TCF_EM_MAX 7 enum { TCF_EM_PROG_TC --- a/tc/Makefile 2012-03-20 00:27:12.000000000 +0000 +++ b/tc/Makefile 2012-06-11 01:22:23.002423418 +0100 @@ -46,6 +46,7 @@ TCMODULES += em_cmp.o TCMODULES += em_u32.o TCMODULES += em_meta.o +TCMODULES += em_ipset.o TCMODULES += q_mqprio.o TCSO := --- a/tc/em_ipset.c 1970-01-01 01:00:00.000000000 +0100 +++ b/tc/em_ipset.c 2012-06-11 01:25:14.379425306 +0100 @@ -0,0 +1,242 @@ +/* + * em_ipset.c IPset Ematch + * + * (C) 2012 Florian Westphal <fw@xxxxxxxxx> + * + * Parts taken from iptables libxt_set.h: + * Copyright (C) 2000-2002 Joakim Axelsson <gozem@xxxxxxxx> + * Patrick Schaaf <bof@xxxxxx> + * Martin Josefsson <gandalf@xxxxxxxxxxxxxx> + * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@xxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <stdbool.h> +#include <stdio.h> +#include <errno.h> +#include <netdb.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> +#include <getopt.h> + +#include <xtables.h> +#include <linux/netfilter/ipset/ip_set.h> + +#include "m_ematch.h" + +#define SO_IP_SET 83 +#define IPSET_INVALID_ID 65535 +#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ +#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ + +/* Uses ip_set_req_get_set */ +#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ + +/* xt_ipset_info; duplicated because "ip_set_id_t" isn't typedef'd in US */ +struct em_ipset_info { + __u16 index; /* ip_set_id_t */ + __u8 dim; + __u8 flags; +}; + +extern struct ematch_util ipset_ematch_util; + +static int get_version(unsigned *version) +{ + int res, sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + struct ip_set_req_version req_version; + socklen_t size = sizeof(req_version); + + if (sockfd < 0) { + fputs("Can't open socket to ipset.\n", stderr); + return -1; + } + + req_version.op = IP_SET_OP_VERSION; + res = getsockopt(sockfd, SOL_IP, SO_IP_SET, &req_version, &size); + if (res != 0) { + perror("xt_set getsockopt"); + return -1; + } + + *version = req_version.version; + return sockfd; +} + +static int do_getsockopt(struct ip_set_req_get_set *req) +{ + int sockfd, res; + socklen_t size = sizeof(struct ip_set_req_get_set); + sockfd = get_version(&req->version); + if (sockfd < 0) + return -1; + res = getsockopt(sockfd, SOL_IP, SO_IP_SET, req, &size); + if (res != 0) + perror("Problem when communicating with ipset"); + close(sockfd); + if (res != 0) + return -1; + + if (size != sizeof(struct ip_set_req_get_set)) { + fprintf(stderr, + "Incorrect return size from kernel during ipset lookup, " + "(want %zu, got %zu)\n", + sizeof(struct ip_set_req_get_set), (size_t)size); + return -1; + } + + return res; +} + +static int +get_set_byid(char *setname, unsigned int idx) +{ + struct ip_set_req_get_set req; + int res; + + req.op = IP_SET_OP_GET_BYINDEX; + req.set.index = idx; + res = do_getsockopt(&req); + if (res != 0) + return -1; + if (req.set.name[0] == '\0') { + fprintf(stderr, + "Set with index %i in kernel doesn't exist.\n", idx); + return -1; + } + + strncpy(setname, req.set.name, IPSET_MAXNAMELEN); + return 0; +} + +static int +get_set_byname(const char *setname, struct em_ipset_info *info) +{ + struct ip_set_req_get_set req; + int res; + + req.op = IP_SET_OP_GET_BYNAME; + strncpy(req.set.name, setname, IPSET_MAXNAMELEN); + req.set.name[IPSET_MAXNAMELEN - 1] = '\0'; + res = do_getsockopt(&req); + if (res != 0) + return -1; + if (req.set.index == IPSET_INVALID_ID) + return -1; + info->index = req.set.index; + return 0; +} + +static int +parse_dirs(const char *opt_arg, struct em_ipset_info *info) +{ + char *saved = strdup(opt_arg); + char *ptr, *tmp = saved; + + if (!tmp) { + perror("strdup"); + return -1; + } + + while (info->dim < IPSET_DIM_MAX && tmp != NULL) { + info->dim++; + ptr = strsep(&tmp, ","); + if (strncmp(ptr, "src", 3) == 0) + info->flags |= (1 << info->dim); + else if (strncmp(ptr, "dst", 3) != 0) { + fputs("You must spefify (the comma separated list of) 'src' or 'dst'\n", stderr); + free(saved); + return -1; + } + } + + if (tmp) + fprintf(stderr, "Can't be more src/dst options than %u", IPSET_DIM_MAX); + free(saved); + return tmp ? -1 : 0; +} + +static void ipset_print_usage(FILE *fd) +{ + fprintf(fd, + "Usage: ipset(SETNAME FLAGS)\n" \ + "where: SETNAME:= string\n" \ + " FLAGS := { FLAG[,FLAGS] }\n" \ + " FLAG := { src | dst }\n" \ + "\n" \ + "Example: ipset(bulk src,dst)\n"); +} + +static int ipset_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr, + struct bstr *args) +{ + struct bstr *a; + struct em_ipset_info set_info; + int ret; + + memset(&set_info, 0, sizeof(set_info)); + +#define PARSE_ERR(CARG, FMT, ARGS...) \ + em_parse_error(EINVAL, args, CARG, &ipset_ematch_util, FMT ,##ARGS) + + if (args == NULL) + return PARSE_ERR(a, "ipset: missing set name"); + + if (args->len >= IPSET_MAXNAMELEN) + return PARSE_ERR(a, "ipset: set name too long (max %u)", IPSET_MAXNAMELEN - 1); + ret = get_set_byname(args->data, &set_info); + if (ret < 0) + return PARSE_ERR(args, "ipset: unknown set name '%s'", args->data); + + if (args->next == NULL) + return PARSE_ERR(args, "ipset: missing set flags"); + + args = bstr_next(args); + if (parse_dirs(args->data, &set_info)) + return PARSE_ERR(args, "ipset: error parsing set flags"); + + if (args->next) { + args = bstr_next(args); + return PARSE_ERR(args, "ipset: unknown parameter"); + } + + addraw_l(n, MAX_MSG, hdr, sizeof(*hdr)); + addraw_l(n, MAX_MSG, &set_info, sizeof(set_info)); + +#undef PARSE_ERR + return 0; +} + +static int ipset_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data, + int data_len) +{ + int i; + char setname[IPSET_MAXNAMELEN]; + const struct em_ipset_info *set_info = data; + + if (data_len != sizeof(*set_info)) { + fprintf(stderr, "xt_ipset_info struct size mismatch\n"); + return -1; + } + + if (get_set_byid(setname, set_info->index)) + return -1; + fputs(setname, fd); + for (i = 1; i <= set_info->dim; i++) { + fprintf(fd, "%s%s", i == 1 ? " " : ",", set_info->flags & (1 << i) ? "src" : "dst"); + } + + return 0; +} + +struct ematch_util ipset_ematch_util = { + .kind = "ipset", + .kind_num = TCF_EM_IPSET, + .parse_eopt = ipset_parse_eopt, + .print_eopt = ipset_print_eopt, + .print_usage = ipset_print_usage +};