[PATCH nftables] rt: introduce routing expression

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Anders K. Pedersen <akp@xxxxxxxxxxxx>

Introduce rt expression for routing related data with support for nexthop
(i.e. the directly connected IP address that an outgoing packet is sent
to), which can be used either for matching or accounting, eg.

 # nft add rule filter postrouting \
	ip daddr 192.168.1.0/24 rt ip nexthop != 192.168.0.1 drop

This will drop any traffic to 192.168.1.0/24 that is not routed via
192.168.0.1.

 # nft add rule filter postrouting \
	flow table acct { rt ip nexthop timeout 600s counter }
 # nft add rule ip6 filter postrouting \
	flow table acct { rt ip6 nexthop6 timeout 600s counter }

These rules count outgoing traffic per nexthop. Note that the timeout
releases an entry if no traffic is seen for this nexthop within 10 minutes.

 # nft add rule inet filter postrouting \
	ether type ip \
	flow table acct { rt ip nexthop timeout 600s counter }
 # nft add rule inet filter postrouting \
	ether type ip6 \
	flow table acct { rt ip6 nexthop6 timeout 600s counter }

Same as above, but via the inet family, where the ether type must be
specified explicitly.

"rt classid" is also implemented identical to "meta rtclassid", since it
is more logical to have this match in the routing expression going forward.

Signed-off-by: Anders K. Pedersen <akp@xxxxxxxxxxxx>
---
 doc/nft.xml                         | 106 ++++++++++++++++++++++++++++
 include/expression.h                |   8 +++
 include/linux/netfilter/nf_tables.h |  26 +++++++
 include/rt.h                        |  35 ++++++++++
 src/Makefile.am                     |   1 +
 src/evaluate.c                      |  27 ++++++++
 src/netlink_delinearize.c           |  18 +++++
 src/netlink_linearize.c             |  15 ++++
 src/parser_bison.y                  |  28 ++++++++
 src/rt.c                            | 135 ++++++++++++++++++++++++++++++++++++
 src/scanner.l                       |   3 +
 tests/files/expr-rt                 |  21 ++++++
 12 files changed, 423 insertions(+)

diff --git a/doc/nft.xml b/doc/nft.xml
index 3b215f8..5fc70b0 100644
--- a/doc/nft.xml
+++ b/doc/nft.xml
@@ -1222,6 +1222,112 @@ filter output oif eth0
 				</example>
 			</para>
 		</refsect2>
+
+		<refsect2>
+			<title>Routing expressions</title>
+			<para>
+				<cmdsynopsis>
+					<command>rt</command>
+					<group choice="req">
+						<arg>classid</arg>
+					</group>
+				</cmdsynopsis>
+				<cmdsynopsis>
+					<command>rt</command>
+					<group choice="req">
+						<arg>ip</arg>
+						<arg>ip6</arg>
+					</group>
+					<group choice="req">
+						<arg>nexthop</arg>
+					</group>
+				</cmdsynopsis>
+			</para>
+			<para>
+				A routing expression refers to routing data associated with a packet.
+			</para>
+			<para>
+				There are two types of routing expressions: unqualified and qualified routing
+				expressions. Qualified routing expressions require either the <command>ip</command> or the <command>ip6</command>
+				address family before the routing key, while unqualified routing expressions are
+				specified directly after the <command>rt</command> key.
+			</para>
+			<para>
+				<table frame="all">
+					<title>Routing expression types</title>
+					<tgroup cols='4' align='left' colsep='1' rowsep='1'>
+						<colspec colname='c1'/>
+						<colspec colname='c2'/>
+						<colspec colname='c3'/>
+						<colspec colname='c4'/>
+						<thead>
+							<row>
+								<entry>Address Family</entry>
+								<entry>Keyword</entry>
+								<entry>Description</entry>
+								<entry>Type</entry>
+							</row>
+						</thead>
+						<tbody>
+							<row>
+								<entry></entry>
+								<entry>classid</entry>
+								<entry>Routing realm</entry>
+								<entry>realm</entry>
+							</row>
+							<row>
+								<entry>ip</entry>
+								<entry>nexthop</entry>
+								<entry>Routing nexthop</entry>
+								<entry>ipv4_addr</entry>
+							</row>
+							<row>
+								<entry>ip6</entry>
+								<entry>nexthop</entry>
+								<entry>Routing nexthop</entry>
+								<entry>ipv6_addr</entry>
+							</row>
+						</tbody>
+					</tgroup>
+				</table>
+			</para>
+			<para>
+				<table frame="all">
+					<title>Routing expression specific types</title>
+					<tgroup cols='2' align='left' colsep='1' rowsep='1'>
+						<colspec colname='c1'/>
+						<colspec colname='c2'/>
+						<thead>
+							<row>
+								<entry>Type</entry>
+								<entry>Description</entry>
+							</row>
+						</thead>
+						<tbody>
+							<row>
+								<entry>realm</entry>
+								<entry>
+									Routing Realm (32 bit number). Can be specified numerically
+									or as symbolic name defined in /etc/iproute2/rt_realms.
+								</entry>
+							</row>
+						</tbody>
+					</tgroup>
+				</table>
+			</para>
+			<para>
+				<example>
+					<title>Using routing expressions</title>
+					<programlisting>
+# qualified meta expressions
+filter output rt ip nexthop 192.168.0.1
+
+# unqualified meta expression
+filter output rt classid 10
+					</programlisting>
+				</example>
+			</para>
+		</refsect2>
 	</refsect1>
 
 	<refsect1>
diff --git a/include/expression.h b/include/expression.h
index 13ca315..91979c4 100644
--- a/include/expression.h
+++ b/include/expression.h
@@ -35,6 +35,7 @@
  * @EXPR_RELATIONAL:	equality and relational expressions
  * @EXPR_NUMGEN:	number generation expression
  * @EXPR_HASH:		hash expression
+ * @EXPR_RT:		routing expression
  */
 enum expr_types {
 	EXPR_INVALID,
@@ -59,6 +60,7 @@ enum expr_types {
 	EXPR_RELATIONAL,
 	EXPR_NUMGEN,
 	EXPR_HASH,
+	EXPR_RT,
 };
 
 enum ops {
@@ -180,6 +182,7 @@ enum expr_flags {
 #include <exthdr.h>
 #include <numgen.h>
 #include <meta.h>
+#include <rt.h>
 #include <hash.h>
 #include <ct.h>
 
@@ -283,6 +286,11 @@ struct expr {
 			enum proto_bases	base;
 		} meta;
 		struct {
+			/* EXPR_RT */
+			enum nft_rt_keys	key;
+			uint32_t		family;
+		} rt;
+		struct {
 			/* EXPR_CT */
 			enum nft_ct_keys	key;
 			int8_t			direction;
diff --git a/include/linux/netfilter/nf_tables.h b/include/linux/netfilter/nf_tables.h
index 1bec149..f80ecb8 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -724,6 +724,16 @@ enum nft_meta_keys {
 };
 
 /**
+ * enum nft_rt_keys - nf_tables routing expression keys
+ *
+ * @NFT_META_NEXTHOP: routing nexthop
+ */
+enum nft_rt_keys {
+	NFT_RT_CLASSID,
+	NFT_RT_NEXTHOP,
+};
+
+/**
  * enum nft_hash_attributes - nf_tables hash expression netlink attributes
  *
  * @NFTA_HASH_SREG: source register (NLA_U32)
@@ -760,6 +770,22 @@ enum nft_meta_attributes {
 #define NFTA_META_MAX		(__NFTA_META_MAX - 1)
 
 /**
+ * enum nft_rt_attributes - nf_tables routing expression netlink attributes
+ *
+ * @NFTA_RT_DREG: destination register (NLA_U32)
+ * @NFTA_RT_KEY: meta data item to load (NLA_U32: nft_rt_keys)
+ * @NFTA_RT_FAMILY: Address family (NLA_U32)
+ */
+enum nft_rt_attributes {
+	NFTA_RT_UNSPEC,
+	NFTA_RT_DREG,
+	NFTA_RT_KEY,
+	NFTA_RT_FAMILY,
+	__NFTA_RT_MAX
+};
+#define NFTA_RT_MAX		(__NFTA_RT_MAX - 1)
+
+/**
  * enum nft_ct_keys - nf_tables ct expression keys
  *
  * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
diff --git a/include/rt.h b/include/rt.h
new file mode 100644
index 0000000..395a1df
--- /dev/null
+++ b/include/rt.h
@@ -0,0 +1,35 @@
+#ifndef NFTABLES_RT_H
+#define NFTABLES_RT_H
+
+/**
+ * struct rt_template - template for routing expressions
+ *
+ * @token:	parser token for the expression
+ * @dtype_ip:	data type of the ip expression
+ * @dtype_ip6:	data type of the ip6 expression
+ * @len_ip:	length of the ip expression
+ * @len_ip6:	length of the ip6 expression
+ * @byteorder:	byteorder
+ */
+struct rt_template {
+	const char		*token;
+	const struct datatype	*dtype_ip, *dtype_ip6;
+	unsigned int		len_ip, len_ip6;
+	enum byteorder		byteorder;
+};
+
+#define RT_TEMPLATE(__token, __dtype_ip, __dtype_ip6,	\
+		    __len_ip, __len_ip6, __byteorder) {	\
+	.token		= (__token),			\
+	.dtype_ip	= (__dtype_ip),			\
+	.dtype_ip6	= (__dtype_ip6),		\
+	.len_ip		= (__len_ip),			\
+	.len_ip6	= (__len_ip6),			\
+	.byteorder	= (__byteorder),		\
+}
+
+extern struct expr *rt_expr_alloc(const struct location *loc,
+				  enum nft_rt_keys key,
+				  int family);
+
+#endif /* NFTABLES_RT_H */
diff --git a/src/Makefile.am b/src/Makefile.am
index 63bbef2..9a151bd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -38,6 +38,7 @@ nft_SOURCES =	main.c				\
 		exthdr.c			\
 		hash.c				\
 		meta.c				\
+		rt.c				\
 		numgen.c			\
 		ct.c				\
 		netlink.c			\
diff --git a/src/evaluate.c b/src/evaluate.c
index 45af329..f5dbaa2 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -612,6 +612,31 @@ static int expr_evaluate_payload(struct eval_ctx *ctx, struct expr **exprp)
 }
 
 /*
+ * RT expression: validate protocol dependencies.
+ */
+static int expr_evaluate_rt(struct eval_ctx *ctx, struct expr **expr)
+{
+	const struct proto_desc *base;
+	struct expr *rt = *expr;
+
+	base = ctx->pctx.protocol[PROTO_BASE_NETWORK_HDR].desc;
+
+	if ((rt->rt.family == NFPROTO_IPV4 && base == &proto_ip) ||
+	    (rt->rt.family == NFPROTO_IPV6 && base == &proto_ip6) ||
+	    rt->rt.family == NFPROTO_UNSPEC)
+		return expr_evaluate_primary(ctx, expr);
+
+	if (base)
+		return expr_error(ctx->msgs, rt,
+				  "routing family %s cannot be used with %s",
+				  family2str(rt->rt.family), base->name);
+
+	return expr_error(ctx->msgs, rt,
+			  "ether type must be specified "
+			  "before routing family");
+}
+
+/*
  * CT expression: update the protocol dependant types bases on the protocol
  * context.
  */
@@ -1609,6 +1634,8 @@ static int expr_evaluate(struct eval_ctx *ctx, struct expr **expr)
 		return expr_evaluate_primary(ctx, expr);
 	case EXPR_PAYLOAD:
 		return expr_evaluate_payload(ctx, expr);
+	case EXPR_RT:
+		return expr_evaluate_rt(ctx, expr);
 	case EXPR_CT:
 		return expr_evaluate_ct(ctx, expr);
 	case EXPR_PREFIX:
diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index 6bb27b6..6e4f5d0 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -545,6 +545,22 @@ static void netlink_parse_meta(struct netlink_parse_ctx *ctx,
 		netlink_parse_meta_stmt(ctx, loc, nle);
 }
 
+static void netlink_parse_rt(struct netlink_parse_ctx *ctx,
+			     const struct location *loc,
+			     const struct nftnl_expr *nle)
+{
+	enum nft_registers dreg;
+	uint32_t key, fam;
+	struct expr *expr;
+
+	key  = nftnl_expr_get_u32(nle, NFTNL_EXPR_RT_KEY);
+	fam  = nftnl_expr_get_u32(nle, NFTNL_EXPR_RT_FAMILY);
+	expr = rt_expr_alloc(loc, key, fam);
+
+	dreg = netlink_parse_register(nle, NFTNL_EXPR_RT_DREG);
+	netlink_set_register(ctx, dreg, expr);
+}
+
 static void netlink_parse_numgen(struct netlink_parse_ctx *ctx,
 				 const struct location *loc,
 				 const struct nftnl_expr *nle)
@@ -1045,6 +1061,7 @@ static const struct {
 	{ .name = "payload",	.parse = netlink_parse_payload },
 	{ .name = "exthdr",	.parse = netlink_parse_exthdr },
 	{ .name = "meta",	.parse = netlink_parse_meta },
+	{ .name = "rt",		.parse = netlink_parse_rt },
 	{ .name = "ct",		.parse = netlink_parse_ct },
 	{ .name = "counter",	.parse = netlink_parse_counter },
 	{ .name = "log",	.parse = netlink_parse_log },
@@ -1678,6 +1695,7 @@ static void expr_postprocess(struct rule_pp_ctx *ctx, struct expr **exprp)
 		break;
 	case EXPR_SET_REF:
 	case EXPR_META:
+	case EXPR_RT:
 	case EXPR_VERDICT:
 	case EXPR_NUMGEN:
 		break;
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index 558deb2..edcd35b 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -172,6 +172,19 @@ static void netlink_gen_meta(struct netlink_linearize_ctx *ctx,
 	nftnl_rule_add_expr(ctx->nlr, nle);
 }
 
+static void netlink_gen_rt(struct netlink_linearize_ctx *ctx,
+			     const struct expr *expr,
+			     enum nft_registers dreg)
+{
+	struct nftnl_expr *nle;
+
+	nle = alloc_nft_expr("rt");
+	netlink_put_register(nle, NFTNL_EXPR_RT_DREG, dreg);
+	nftnl_expr_set_u32(nle, NFTNL_EXPR_RT_KEY, expr->rt.key);
+	nftnl_expr_set_u32(nle, NFTNL_EXPR_RT_FAMILY, expr->rt.family);
+	nftnl_rule_add_expr(ctx->nlr, nle);
+}
+
 static void netlink_gen_numgen(struct netlink_linearize_ctx *ctx,
 			    const struct expr *expr,
 			    enum nft_registers dreg)
@@ -644,6 +657,8 @@ static void netlink_gen_expr(struct netlink_linearize_ctx *ctx,
 		return netlink_gen_exthdr(ctx, expr, dreg);
 	case EXPR_META:
 		return netlink_gen_meta(ctx, expr, dreg);
+	case EXPR_RT:
+		return netlink_gen_rt(ctx, expr, dreg);
 	case EXPR_CT:
 		return netlink_gen_ct(ctx, expr, dreg);
 	case EXPR_SET_ELEM:
diff --git a/src/parser_bison.y b/src/parser_bison.y
index aac10dc..cb95c9b 100644
--- a/src/parser_bison.y
+++ b/src/parser_bison.y
@@ -340,6 +340,9 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %token OIFGROUP			"oifgroup"
 %token CGROUP			"cgroup"
 
+%token CLASSID			"classid"
+%token NEXTHOP			"nexthop"
+
 %token CT			"ct"
 %token DIRECTION		"direction"
 %token STATE			"state"
@@ -591,6 +594,10 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %destructor { expr_free($$); }	meta_expr
 %type <val>			meta_key	meta_key_qualified	meta_key_unqualified	numgen_type
 
+%type <expr>			rt_expr
+%destructor { expr_free($$); }	rt_expr
+%type <val>			rt_family	rt_key_qualified	rt_key_unqualified
+
 %type <expr>			ct_expr
 %destructor { expr_free($$); }	ct_expr
 %type <val>			ct_key		ct_key_dir	ct_key_counters
@@ -1980,6 +1987,7 @@ primary_expr		:	symbol_expr			{ $$ = $1; }
 			|	payload_expr			{ $$ = $1; }
 			|	exthdr_expr			{ $$ = $1; }
 			|	meta_expr			{ $$ = $1; }
+			|	rt_expr				{ $$ = $1; }
 			|	ct_expr				{ $$ = $1; }
 			|	numgen_expr			{ $$ = $1; }
 			|	hash_expr			{ $$ = $1; }
@@ -2470,6 +2478,26 @@ meta_stmt		:	META	meta_key	SET	expr
 			}
 			;
 
+rt_expr			:	RT	rt_family	rt_key_qualified
+			{
+				$$ = rt_expr_alloc(&@$, $3, $2);
+			}
+			|	RT	rt_key_unqualified
+			{
+				$$ = rt_expr_alloc(&@$, $2, NFPROTO_UNSPEC);
+			}
+			;
+
+rt_family		:	IP		{ $$ = NFPROTO_IPV4; }
+			|	IP6		{ $$ = NFPROTO_IPV6; }
+			;
+
+rt_key_qualified	:	NEXTHOP		{ $$ = NFT_RT_NEXTHOP; }
+			;
+
+rt_key_unqualified	:	CLASSID		{ $$ = NFT_RT_CLASSID; }
+			;
+
 numgen_type		:	INC		{ $$ = NFT_NG_INCREMENTAL; }
 			|	RANDOM		{ $$ = NFT_NG_RANDOM; }
 			;
diff --git a/src/rt.c b/src/rt.c
new file mode 100644
index 0000000..cbf0981
--- /dev/null
+++ b/src/rt.c
@@ -0,0 +1,135 @@
+/*
+ * Routing expression related definition and types.
+ *
+ * Copyright (c) 2016 Anders K. Pedersen <akp@xxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/netfilter.h>
+
+#include <nftables.h>
+#include <expression.h>
+#include <datatype.h>
+#include <rt.h>
+#include <rule.h>
+
+static struct symbol_table *realm_tbl;
+static void __init realm_table_init(void)
+{
+	realm_tbl = rt_symbol_table_init("/etc/iproute2/rt_realms");
+}
+
+static void __exit realm_table_exit(void)
+{
+	rt_symbol_table_free(realm_tbl);
+}
+
+static void realm_type_print(const struct expr *expr)
+{
+	return symbolic_constant_print(realm_tbl, expr, true);
+}
+
+static struct error_record *realm_type_parse(const struct expr *sym,
+					     struct expr **res)
+{
+	return symbolic_constant_parse(sym, realm_tbl, res);
+}
+
+static const struct datatype realm_type = {
+	.type		= TYPE_REALM,
+	.name		= "realm",
+	.desc		= "routing realm",
+	.byteorder	= BYTEORDER_HOST_ENDIAN,
+	.size		= 4 * BITS_PER_BYTE,
+	.basetype	= &integer_type,
+	.print		= realm_type_print,
+	.parse		= realm_type_parse,
+	.flags		= DTYPE_F_PREFIX,
+};
+
+static const struct rt_template rt_templates[] = {
+	[NFT_RT_CLASSID]	= RT_TEMPLATE("classid",
+					      &realm_type,
+					      &realm_type,
+					      4 * BITS_PER_BYTE,
+					      4 * BITS_PER_BYTE,
+					      BYTEORDER_HOST_ENDIAN),
+	[NFT_RT_NEXTHOP]	= RT_TEMPLATE("nexthop",
+					      &ipaddr_type,
+					      &ip6addr_type,
+					      4 * BITS_PER_BYTE,
+					      16 * BITS_PER_BYTE,
+					      BYTEORDER_BIG_ENDIAN),
+};
+
+static bool rt_key_is_qualified(enum nft_rt_keys key)
+{
+	switch (key) {
+	case NFT_RT_NEXTHOP:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void rt_expr_print(const struct expr *expr)
+{
+	if (rt_key_is_qualified(expr->rt.key))
+		printf("rt %s %s", family2str(expr->rt.family),
+		       rt_templates[expr->rt.key].token);
+	else
+		printf("rt %s", rt_templates[expr->rt.key].token);
+}
+
+static bool rt_expr_cmp(const struct expr *e1, const struct expr *e2)
+{
+	return e1->rt.key == e2->rt.key &&
+	       e1->rt.family == e2->rt.family;
+}
+
+static void rt_expr_clone(struct expr *new, const struct expr *expr)
+{
+	new->rt.key = expr->rt.key;
+	new->rt.family = expr->rt.family;
+}
+
+static const struct expr_ops rt_expr_ops = {
+	.type		= EXPR_RT,
+	.name		= "rt",
+	.print		= rt_expr_print,
+	.cmp		= rt_expr_cmp,
+	.clone		= rt_expr_clone,
+};
+
+struct expr *rt_expr_alloc(const struct location *loc, enum nft_rt_keys key,
+			   int family)
+{
+	const struct rt_template *tmpl = &rt_templates[key];
+	struct expr *expr;
+
+	if (family == NFPROTO_IPV4)
+		expr = expr_alloc(loc, &rt_expr_ops, tmpl->dtype_ip,
+				  tmpl->byteorder, tmpl->len_ip);
+	else
+		expr = expr_alloc(loc, &rt_expr_ops, tmpl->dtype_ip6,
+				  tmpl->byteorder, tmpl->len_ip6);
+	expr->rt.key = key;
+	expr->rt.family = family;
+
+	return expr;
+}
+
+static void __init rt_init(void)
+{
+	datatype_register(&realm_type);
+}
diff --git a/src/scanner.l b/src/scanner.l
index 8b5a383..88f36d2 100644
--- a/src/scanner.l
+++ b/src/scanner.l
@@ -456,6 +456,9 @@ addrstring	({macaddr}|{ip4addr}|{ip6addr})
 "oifgroup"		{ return OIFGROUP; }
 "cgroup"		{ return CGROUP; }
 
+"classid"		{ return CLASSID; }
+"nexthop"		{ return NEXTHOP; }
+
 "ct"			{ return CT; }
 "direction"		{ return DIRECTION; }
 "state"			{ return STATE; }
diff --git a/tests/files/expr-rt b/tests/files/expr-rt
new file mode 100644
index 0000000..0f35811
--- /dev/null
+++ b/tests/files/expr-rt
@@ -0,0 +1,21 @@
+#! nft -f
+
+add table ip filter
+add chain ip filter output { type filter hook output priority 0 ; }
+
+add table ip6 filter
+add chain ip6 filter output { type filter hook output priority 0 ; }
+
+add table inet filter
+add chain inet filter output { type filter hook output priority 0 ; }
+
+# rt: classid (see /etc/iproute2/rt_realms)
+add rule ip filter output rt classid cosmos counter
+add rule ip6 filter output rt classid cosmos counter
+add rule inet filter output rt classid cosmos counter
+
+# rt: nexthop
+add rule ip filter output rt ip nexthop 127.0.0.1 counter
+add rule ip6 filter output rt ip6 nexthop ::1 counter
+add rule inet filter output ether type ip rt ip nexthop 127.0.0.1 counter
+add rule inet filter output ether type ip6 rt ip6 nexthop ::1 counter
��.n��������+%������w��{.n����z��׫���n�r������&��z�ޗ�zf���h���~����������_��+v���)ߣ�

[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux