[PATCH nft 6/6] nft monitor [ trace ]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



... can now display nftables nftrace debug information.

$ nft filter input tcp dport 10000 nftrace set 1
$ nft filter input icmp type echo-request nftrace set 1
$ nft -nn monitor trace
trace id e1f5055f ip filter input packet: iif eth0 ether saddr 63:f6:4b:00:54:52 ether daddr c9:4b:a9:00:54:52 ip saddr 192.168.122.1 ip daddr 192.168.122.83 ip tos 0 ip ttl 64 ip id 32315 ip length 84 icmp type echo-request icmp code 0 icmp id 10087 icmp sequence 1
trace id e1f5055f ip filter input rule icmp type echo-request nftrace set 1 (verdict continue)
trace id e1f5055f ip filter input verdict continue
trace id e1f5055f ip filter input
trace id 74e47ad2 ip filter input packet: iif vlan0 ether saddr 63:f6:4b:00:54:52 ether daddr c9:4b:a9:00:54:52 vlan pcp 0 vlan cfi 1 vlan id 1000 ip saddr 10.0.0.1 ip daddr 10.0.0.2 ip tos 0 ip ttl 64 ip id 49030 ip length 84 icmp type echo-request icmp code 0 icmp id 10095 icmp sequence 1
trace id 74e47ad2 ip filter input rule icmp type echo-request nftrace set 1 (verdict continue)
trace id 74e47ad2 ip filter input verdict continue
trace id 74e47ad2 ip filter input
trace id 3030de23 ip filter input packet: iif vlan0 ether saddr 63:f6:4b:00:54:52 ether daddr c9:4b:a9:00:54:52 vlan pcp 0 vlan cfi 1 vlan id 1000 ip saddr 10.0.0.1 ip daddr 10.0.0.2 ip tos 16 ip ttl 64 ip id 59062 ip length 60 tcp sport 55438 tcp dport 10000 tcp flags == syn tcp window 29200
trace id 3030de23 ip filter input rule tcp dport 10000 nftrace set 1 (verdict continue)
trace id 3030de23 ip filter input verdict continue
trace id 3030de23 ip filter input

Based on a patch from Florian Westphal, which again was based on a patch
from Markus Kötter.

Signed-off-by: Patrick McHardy <kaber@xxxxxxxxx>
---
 include/payload.h |   1 +
 include/rule.h    |   1 +
 src/evaluate.c    |  18 ++++
 src/netlink.c     | 269 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/payload.c     |   5 +
 src/rule.c        |  61 +++++++++++--
 6 files changed, 345 insertions(+), 10 deletions(-)

diff --git a/include/payload.h b/include/payload.h
index b180ff5..37375c1 100644
--- a/include/payload.h
+++ b/include/payload.h
@@ -9,6 +9,7 @@ extern struct expr *payload_expr_alloc(const struct location *loc,
 				       unsigned int type);
 extern void payload_init_raw(struct expr *expr, enum proto_bases base,
 			     unsigned int offset, unsigned int len);
+extern unsigned int payload_hdr_field(const struct expr *expr);
 
 struct eval_ctx;
 struct stmt;
diff --git a/include/rule.h b/include/rule.h
index 6dbde13..09b3ff7 100644
--- a/include/rule.h
+++ b/include/rule.h
@@ -192,6 +192,7 @@ extern struct rule *rule_alloc(const struct location *loc,
 			       const struct handle *h);
 extern void rule_free(struct rule *rule);
 extern void rule_print(const struct rule *rule);
+extern struct rule *rule_lookup(const struct chain *chain, uint64_t handle);
 
 /**
  * enum set_flags
diff --git a/src/evaluate.c b/src/evaluate.c
index a65e145..63c0091 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -2479,6 +2479,7 @@ enum {
 	CMD_MONITOR_EVENT_ANY,
 	CMD_MONITOR_EVENT_NEW,
 	CMD_MONITOR_EVENT_DEL,
+	CMD_MONITOR_EVENT_TRACE,
 	CMD_MONITOR_EVENT_MAX
 };
 
@@ -2520,6 +2521,21 @@ static uint32_t monitor_flags[CMD_MONITOR_EVENT_MAX][CMD_MONITOR_OBJ_MAX] = {
 		[CMD_MONITOR_OBJ_SETS]		= (1 << NFT_MSG_DELSET),
 		[CMD_MONITOR_OBJ_ELEMS]		= (1 << NFT_MSG_DELSETELEM),
 	},
+	[CMD_MONITOR_EVENT_TRACE] = {
+		[CMD_MONITOR_OBJ_ANY]		= (1 << NFT_MSG_NEWTABLE) |
+						  (1 << NFT_MSG_NEWCHAIN) |
+						  (1 << NFT_MSG_NEWRULE)  |
+						  (1 << NFT_MSG_DELTABLE) |
+						  (1 << NFT_MSG_DELCHAIN) |
+						  (1 << NFT_MSG_DELRULE)  |
+						  (1 << NFT_MSG_TRACE),
+		[CMD_MONITOR_OBJ_TABLES]	= (1 << NFT_MSG_NEWTABLE) |
+						  (1 << NFT_MSG_DELTABLE),
+		[CMD_MONITOR_OBJ_CHAINS]	= (1 << NFT_MSG_NEWCHAIN) |
+						  (1 << NFT_MSG_DELCHAIN),
+		[CMD_MONITOR_OBJ_RULES]		= (1 << NFT_MSG_NEWRULE) |
+						  (1 << NFT_MSG_DELRULE),
+	},
 };
 
 static int cmd_evaluate_monitor(struct eval_ctx *ctx, struct cmd *cmd)
@@ -2537,6 +2553,8 @@ static int cmd_evaluate_monitor(struct eval_ctx *ctx, struct cmd *cmd)
 		event = CMD_MONITOR_EVENT_NEW;
 	else if (strcmp(cmd->monitor->event, "destroy") == 0)
 		event = CMD_MONITOR_EVENT_DEL;
+	else if (strcmp(cmd->monitor->event, "trace") == 0)
+		event = CMD_MONITOR_EVENT_TRACE;
 	else {
 		return monitor_error(ctx, cmd->monitor, "invalid event %s",
 				     cmd->monitor->event);
diff --git a/src/netlink.c b/src/netlink.c
index e3ba2ed..890e9b9 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -18,6 +18,7 @@
 #include <stdlib.h>
 
 #include <libnftnl/table.h>
+#include <libnftnl/trace.h>
 #include <libnftnl/chain.h>
 #include <libnftnl/expr.h>
 #include <libnftnl/set.h>
@@ -30,9 +31,11 @@
 #include <netlink.h>
 #include <mnl.h>
 #include <expression.h>
+#include <statement.h>
 #include <gmputil.h>
 #include <utils.h>
 #include <erec.h>
+#include <iface.h>
 
 static struct mnl_socket *nf_sock;
 static struct mnl_socket *nf_mon_sock;
@@ -2109,6 +2112,266 @@ static void netlink_events_cache_update(struct netlink_mon_handler *monh,
 	}
 }
 
+static void trace_print_hdr(const struct nftnl_trace *nlt)
+{
+	printf("trace id %08x ", nftnl_trace_get_u32(nlt, NFTNL_TRACE_ID));
+	printf("%s ", family2str(nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY)));
+	if (nftnl_trace_is_set(nlt, NFTNL_TRACE_TABLE))
+		printf("%s ", nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE));
+	if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CHAIN))
+		printf("%s ", nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN));
+}
+
+static void trace_print_expr(const struct nftnl_trace *nlt, unsigned int attr,
+			     struct expr *lhs)
+{
+	struct expr *rhs, *rel;
+	const void *data;
+	uint32_t len;
+
+	data = nftnl_trace_get_data(nlt, attr, &len);
+	rhs  = constant_expr_alloc(&netlink_location,
+				   lhs->dtype, lhs->byteorder,
+				   len * BITS_PER_BYTE, data);
+	rel  = relational_expr_alloc(&netlink_location, OP_EQ, lhs, rhs);
+
+	expr_print(rel);
+	printf(" ");
+	expr_free(rel);
+}
+
+static void trace_print_verdict(const struct nftnl_trace *nlt)
+{
+	const char *chain = NULL;
+	unsigned int verdict;
+	struct expr *expr;
+
+	verdict = nftnl_trace_get_u32(nlt, NFTNL_TRACE_VERDICT);
+	if (nftnl_trace_is_set(nlt, NFTNL_TRACE_JUMP_TARGET))
+		chain = xstrdup(nftnl_trace_get_str(nlt, NFTNL_TRACE_JUMP_TARGET));
+	expr = verdict_expr_alloc(&netlink_location, verdict, chain);
+
+	printf("verdict ");
+	expr_print(expr);
+	expr_free(expr);
+}
+
+static void trace_print_rule(const struct nftnl_trace *nlt)
+{
+	const struct table *table;
+	uint64_t rule_handle;
+	struct chain *chain;
+	struct rule *rule;
+	struct handle h;
+
+	h.family = nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY);
+	h.table  = nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE);
+	h.chain  = nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN);
+
+	if (!h.table)
+		return;
+
+	table = table_lookup(&h);
+	if (!table)
+		return;
+
+	chain = chain_lookup(table, &h);
+	if (!chain)
+		return;
+
+	rule_handle = nftnl_trace_get_u64(nlt, NFTNL_TRACE_RULE_HANDLE);
+	rule = rule_lookup(chain, rule_handle);
+	if (!rule)
+		return;
+
+	trace_print_hdr(nlt);
+	printf("rule ");
+	rule_print(rule);
+	printf(" (");
+	trace_print_verdict(nlt);
+	printf(")\n");
+}
+
+static void trace_gen_stmts(struct list_head *stmts,
+			    struct proto_ctx *ctx, struct payload_dep_ctx *pctx,
+			    const struct nftnl_trace *nlt, unsigned int attr,
+			    enum proto_bases base)
+{
+	struct list_head unordered = LIST_HEAD_INIT(unordered);
+	struct list_head list;
+	struct expr *rel, *lhs, *rhs, *tmp, *nexpr;
+	struct stmt *stmt;
+	const struct proto_desc *desc;
+	const void *hdr;
+	uint32_t hlen;
+	unsigned int n;
+	bool stacked;
+
+	if (!nftnl_trace_is_set(nlt, attr))
+		return;
+	hdr = nftnl_trace_get_data(nlt, attr, &hlen);
+
+	lhs = payload_expr_alloc(&netlink_location, NULL, 0);
+	payload_init_raw(lhs, base, 0, hlen * BITS_PER_BYTE);
+	rhs = constant_expr_alloc(&netlink_location,
+				  &invalid_type, BYTEORDER_INVALID,
+				  hlen * BITS_PER_BYTE, hdr);
+
+restart:
+	init_list_head(&list);
+	payload_expr_expand(&list, lhs, ctx);
+	expr_free(lhs);
+
+	desc = NULL;
+	list_for_each_entry_safe(lhs, nexpr, &list, list) {
+		if (desc && desc != ctx->protocol[base].desc) {
+			/* Chained protocols */
+			lhs->payload.offset = 0;
+			if (ctx->protocol[base].desc == NULL)
+				break;
+			goto restart;
+		}
+
+		tmp = constant_expr_splice(rhs, lhs->len);
+		expr_set_type(tmp, lhs->dtype, lhs->byteorder);
+
+		/* Skip unknown and filtered expressions */
+		desc = lhs->payload.desc;
+		if (lhs->dtype == &invalid_type ||
+		    desc->checksum_key == payload_hdr_field(lhs) ||
+		    desc->format.filter & (1 << payload_hdr_field(lhs))) {
+			expr_free(lhs);
+			expr_free(tmp);
+			continue;
+		}
+
+		rel  = relational_expr_alloc(&lhs->location, OP_EQ, lhs, tmp);
+		stmt = expr_stmt_alloc(&rel->location, rel);
+		list_add_tail(&stmt->list, &unordered);
+
+		desc = ctx->protocol[base].desc;
+		lhs->ops->pctx_update(ctx, rel);
+	}
+
+	expr_free(rhs);
+
+	n = 0;
+next:
+	list_for_each_entry(stmt, &unordered, list) {
+		rel = stmt->expr;
+		lhs = rel->left;
+
+		/* Move statements to result list in defined order */
+		desc = lhs->payload.desc;
+		if (desc->format.order[n] &&
+		    desc->format.order[n] != payload_hdr_field(lhs))
+			continue;
+
+		list_move_tail(&stmt->list, stmts);
+		n++;
+
+		stacked = payload_is_stacked(desc, rel);
+
+		if (lhs->flags & EXPR_F_PROTOCOL &&
+		    pctx->pbase == PROTO_BASE_INVALID) {
+			payload_dependency_store(pctx, stmt, base - stacked);
+		} else {
+			payload_dependency_kill(pctx, lhs);
+			if (lhs->flags & EXPR_F_PROTOCOL)
+				payload_dependency_store(pctx, stmt, base - stacked);
+		}
+
+		goto next;
+	}
+}
+
+static void trace_print_packet(const struct nftnl_trace *nlt)
+{
+	struct list_head stmts = LIST_HEAD_INIT(stmts);
+	struct payload_dep_ctx pctx = {};
+	struct proto_ctx ctx;
+	uint16_t dev_type;
+	struct stmt *stmt, *next;
+
+	trace_print_hdr(nlt);
+
+	printf("packet: ");
+	if (nftnl_trace_is_set(nlt, NFTNL_TRACE_IIF))
+		trace_print_expr(nlt, NFTNL_TRACE_IIF,
+				 meta_expr_alloc(&netlink_location,
+						 NFT_META_IIF));
+	if (nftnl_trace_is_set(nlt, NFTNL_TRACE_OIF))
+		trace_print_expr(nlt, NFTNL_TRACE_OIF,
+				 meta_expr_alloc(&netlink_location,
+						 NFT_META_OIF));
+
+	proto_ctx_init(&ctx, nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY));
+	if (ctx.protocol[PROTO_BASE_LL_HDR].desc == NULL &&
+	    nftnl_trace_is_set(nlt, NFTNL_TRACE_IIFTYPE)) {
+		dev_type = nftnl_trace_get_u16(nlt, NFTNL_TRACE_IIFTYPE);
+		proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location,
+				 proto_dev_desc(dev_type));
+	}
+
+	trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_LL_HEADER,
+			PROTO_BASE_LL_HDR);
+	trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_NETWORK_HEADER,
+			PROTO_BASE_NETWORK_HDR);
+	trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_TRANSPORT_HEADER,
+			PROTO_BASE_TRANSPORT_HDR);
+
+	list_for_each_entry_safe(stmt, next, &stmts, list) {
+		stmt_print(stmt);
+		printf(" ");
+		stmt_free(stmt);
+	}
+	printf("\n");
+}
+
+static int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
+				   struct netlink_mon_handler *monh)
+{
+	struct nftnl_trace *nlt;
+
+	assert(type == NFT_MSG_TRACE);
+
+	nlt = nftnl_trace_alloc();
+	if (!nlt)
+		memory_allocation_error();
+
+	if (nftnl_trace_nlmsg_parse(nlh, nlt) < 0)
+		netlink_abi_error();
+
+	switch (nftnl_trace_get_u32(nlt, NFTNL_TRACE_TYPE)) {
+	case NFT_TRACETYPE_RULE:
+		if (nftnl_trace_is_set(nlt, NFTNL_TRACE_LL_HEADER) ||
+		    nftnl_trace_is_set(nlt, NFTNL_TRACE_NETWORK_HEADER))
+			trace_print_packet(nlt);
+
+		if (nftnl_trace_is_set(nlt, NFTNL_TRACE_RULE_HANDLE))
+			trace_print_rule(nlt);
+		break;
+	case NFT_TRACETYPE_POLICY:
+	case NFT_TRACETYPE_RETURN:
+		trace_print_hdr(nlt);
+
+		if (nftnl_trace_is_set(nlt, NFTNL_TRACE_VERDICT)) {
+			trace_print_verdict(nlt);
+			printf(" ");
+		}
+
+		if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
+			trace_print_expr(nlt, NFTNL_TRACE_MARK,
+					 meta_expr_alloc(&netlink_location,
+							 NFT_META_MARK));
+		printf("\n");
+		break;
+	}
+
+	nftnl_trace_free(nlt);
+	return MNL_CB_OK;
+}
+
 static int netlink_events_cb(const struct nlmsghdr *nlh, void *data)
 {
 	int ret = MNL_CB_OK;
@@ -2141,6 +2404,9 @@ static int netlink_events_cb(const struct nlmsghdr *nlh, void *data)
 	case NFT_MSG_DELRULE:
 		ret = netlink_events_rule_cb(nlh, type, monh);
 		break;
+	case NFT_MSG_TRACE:
+		ret = netlink_events_trace_cb(nlh, type, monh);
+		break;
 	}
 	fflush(stdout);
 
@@ -2151,7 +2417,8 @@ int netlink_monitor(struct netlink_mon_handler *monhandler)
 {
 	netlink_open_mon_sock();
 
-	if (mnl_socket_bind(nf_mon_sock, (1 << (NFNLGRP_NFTABLES-1)),
+	if (mnl_socket_bind(nf_mon_sock, (1 << (NFNLGRP_NFTABLES-1)) |
+					 (1 << (NFNLGRP_NFTRACE-1)),
 			    MNL_SOCKET_AUTOPID) < 0)
 		return netlink_io_error(monhandler->ctx, monhandler->loc,
 					"Could not bind to netlink socket %s",
diff --git a/src/payload.c b/src/payload.c
index 0bbfb54..ac0e917 100644
--- a/src/payload.c
+++ b/src/payload.c
@@ -142,6 +142,11 @@ void payload_init_raw(struct expr *expr, enum proto_bases base,
 	expr->len		= len;
 }
 
+unsigned int payload_hdr_field(const struct expr *expr)
+{
+	return expr->payload.tmpl - expr->payload.desc->templates;
+}
+
 static void payload_stmt_print(const struct stmt *stmt)
 {
 	expr_print(stmt->payload.expr);
diff --git a/src/rule.c b/src/rule.c
index 2fe6745..cd7f84e 100644
--- a/src/rule.c
+++ b/src/rule.c
@@ -396,6 +396,17 @@ void rule_print(const struct rule *rule)
 		printf(" # handle %" PRIu64, rule->handle.handle.id);
 }
 
+struct rule *rule_lookup(const struct chain *chain, uint64_t handle)
+{
+	struct rule *rule;
+
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (rule->handle.handle.id == handle)
+			return rule;
+	}
+	return NULL;
+}
+
 struct scope *scope_init(struct scope *scope, const struct scope *parent)
 {
 	scope->parent = parent;
@@ -1198,27 +1209,59 @@ static int do_command_rename(struct netlink_ctx *ctx, struct cmd *cmd)
 	return 0;
 }
 
-static int do_command_monitor(struct netlink_ctx *ctx, struct cmd *cmd)
+static bool need_cache(const struct cmd *cmd)
 {
-	struct table *t;
-	struct set *s;
-	struct netlink_mon_handler monhandler;
-
-	/* cache only needed if monitoring:
+	/*
 	 *  - new rules in default format
 	 *  - new elements
 	 */
 	if (((cmd->monitor->flags & (1 << NFT_MSG_NEWRULE)) &&
 	    (cmd->monitor->format == NFTNL_OUTPUT_DEFAULT)) ||
 	    (cmd->monitor->flags & (1 << NFT_MSG_NEWSETELEM)))
-		monhandler.cache_needed = true;
-	else
-		monhandler.cache_needed = false;
+		return true;
+
+	if (cmd->monitor->flags & (1 << NFT_MSG_TRACE))
+		return true;
 
+	return false;
+}
+
+static int do_command_monitor(struct netlink_ctx *ctx, struct cmd *cmd)
+{
+	struct table *t;
+	struct set *s;
+	struct netlink_mon_handler monhandler;
+
+	monhandler.cache_needed = need_cache(cmd);
 	if (monhandler.cache_needed) {
+		struct rule *rule, *nrule;
+		struct chain *chain;
+		int ret;
+
 		list_for_each_entry(t, &table_list, list) {
 			list_for_each_entry(s, &t->sets, list)
 				s->init = set_expr_alloc(&cmd->location);
+
+			if (!(cmd->monitor->flags & (1 << NFT_MSG_TRACE)))
+				continue;
+
+			/* When tracing we'd like to translate the rule handle
+			 * we receive in the trace messages to the actual rule
+			 * struct to print that out.  Populate rule cache now.
+			 */
+			ret = netlink_list_table(ctx, &t->handle,
+						 &internal_location);
+
+			if (ret != 0)
+				/* Shouldn't happen and doesn't break things
+				 * too badly
+				 */
+				continue;
+
+			list_for_each_entry_safe(rule, nrule, &ctx->list, list) {
+				chain = chain_lookup(t, &rule->handle);
+				list_move_tail(&rule->list, &chain->rules);
+			}
 		}
 	}
 
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux