[RFC] nft trace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

the only thing which I do not like about
https://github.com/commonism/iptables-trace
is the timing.
Getting released more than 10 years after iptables had it's initial release, I decided to address nft in time.

So, I propose to add a new command to nft - "trace".

nft trace uses libnetfilter_log receive log messages, and gathers information from messages prefixed with "TRACE: ". It looks up the chain, rule, and action, and prints a human readable representation.

such as

IN=eth0 OUT= SRC=141.30.13.10 DST=10.0.2.15 LEN=1408 TOS=0x00 PREC=0x00 TTL=64 ID=60563
        filter input (#2) NFMARK=0x0
                 ip protocol icmp nftrace set 1
        filter input (#3) NFMARK=0x0
                 icmp type { echo-request, echo-reply} counter packets 5 bytes 420
        filter input (#4) NFMARK=0x0
                 ip protocol icmp jump test-0-0
        filter test-0-0 (#1) NFMARK=0x1
                 mark set 0x00000001
        filter test-0-0 (#3) NFMARK=0x2
                 mark set 0x00000002
        filter test-0-0 NFMARK=0x2
                 => RETURN
        filter input (#5) NFMARK=0x2
                 ip protocol icmp goto test-1-0
        filter test-1-0 (#1) NFMARK=0x2
                 ip protocol vmap { tcp : drop, icmp : jump test-0-0}
        filter test-0-0 (#1) NFMARK=0x1
                 mark set 0x00000001
        filter test-0-0 (#3) NFMARK=0x2
                 mark set 0x00000002
        filter test-0-0 NFMARK=0x2
                 => RETURN
        filter test-1-0 NFMARK=0x2
                 => RETURN
        filter input NFMARK=0x2
                ACCEPT


So you can set an nftrace meta on a packet you want to debug, and use "nft trace" to follow the packets on their way through the kernel. You can use "limit" to limit the amount of packets traced to 1 packet a second, making things smooth and easy to follow and avoiding overlaps when printing the trace. The start of a trace is detected by looking for a nftrace meta on the currently matching rule, if it is set, you can print the packet header.

I really recommend such trace functionality, it is of great use to me when using iptables. And as I doubt nftables rulesets will get any easier to debug, I'm looking to get trace into nft. In mose cases it is enough to know the ip address of the service which does not work, setup a trace rule, follow the packet, see where it gets lost. iptables-trace basically changed the way I write/adjust rules, and I want to keep it it this way with nft.

None of this works with current kernels, as changes to the netlink logging infrastructure in v3.17 prevent TRACE messages from being relayed to netlink.
I filed a report here:
http://bugzilla.netfilter.org/show_bug.cgi?id=1003

Using NULL as nf_loginfo works for me (messages end up in group 0 as before), but in order to get a first class trace functionality I'm looking to get NFLOG arguments to TRACE, so you can set a group and be a of type ULOG in the kernel, and get relayed to userspace. For compatibility, TRACE without additional arguments can still be of type LOG, as it was before. Having NFLOG groups would even allow multiple "nft trace" instances without interfering with each other, each instance addressing a different group.

Making this change in a compatible way for xtables/iptables/ip6tables/nftables requires quite some work on userspace and kernel space for all of the software providing TRACE functionality.

Attached is a patch on current nftables which can be used as technical demo, it is ugly, incomplete and rough, but short and will work for something to look at. To use this, I patched my kernel to log TRACE messages using NULL as nf_loginfo as mentioned before.

I'd love a response regarding the odds of getting the nft trace functionality merged - I promise to continue working on it, clean things up, I'll take care it is optional and does not tie nft to libnetfilter_conntrack (e.g. for embedded platforms). Same for changing TRACE to accept arguments - fixing the regression by using NULL as nf_loginfo for trace is trivial but ... having NFLOG groups in TRACE can be really handy.



MfG
Markus Kötter
diff --git a/include/rule.h b/include/rule.h
index 491411e..325ac0f 100644
--- a/include/rule.h
+++ b/include/rule.h
@@ -119,6 +119,7 @@ struct chain {
 	const char		*type;
 	struct scope		scope;
 	struct list_head	rules;
+	uint32_t		policy;
 };
 
 extern const char *chain_type_name_lookup(const char *name);
@@ -224,6 +225,7 @@ extern void set_print_plain(const struct set *s);
  * @CMD_EXPORT:		export the ruleset in a given format
  * @CMD_MONITOR:	event listener
  * @CMD_DESCRIBE:	describe an expression
+ * @CMD_TRACE:		print the packets trace
  */
 enum cmd_ops {
 	CMD_INVALID,
@@ -237,6 +239,7 @@ enum cmd_ops {
 	CMD_EXPORT,
 	CMD_MONITOR,
 	CMD_DESCRIBE,
+	CMD_TRACE,
 };
 
 /**
@@ -253,6 +256,7 @@ enum cmd_ops {
  * @CMD_OBJ_EXPR:	expression
  * @CMD_OBJ_MONITOR:	monitor
  * @CMD_OBJ_EXPORT:	export
+ * @CMD_OBJ_TRACE:	trace
  */
 enum cmd_obj {
 	CMD_OBJ_INVALID,
@@ -266,6 +270,7 @@ enum cmd_obj {
 	CMD_OBJ_EXPR,
 	CMD_OBJ_MONITOR,
 	CMD_OBJ_EXPORT,
+	CMD_OBJ_TRACE,
 };
 
 struct export {
@@ -296,6 +301,14 @@ struct monitor {
 struct monitor *monitor_alloc(uint32_t format, uint32_t type, const char *event);
 void monitor_free(struct monitor *m);
 
+struct trace {
+	struct location	location;
+};
+
+struct trace *trace_alloc(void);
+void trace_free(struct trace *m);
+
+
 /**
  * struct cmd - command statement
  *
@@ -325,6 +338,7 @@ struct cmd {
 		struct table	*table;
 		struct monitor	*monitor;
 		struct export	*export;
+		struct trace	*trace;
 	};
 	const void		*arg;
 };
diff --git a/src/Makefile.am b/src/Makefile.am
index 2410fd3..2123013 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -57,4 +57,4 @@ mini-gmp.o: AM_CFLAGS += -Wno-sign-compare
 nft_SOURCES +=	mini-gmp.c
 endif
 
-nft_LDADD	= ${LIBMNL_LIBS} ${LIBNFTNL_LIBS}
+nft_LDADD	= ${LIBMNL_LIBS} ${LIBNFTNL_LIBS} -lnetfilter_log
diff --git a/src/evaluate.c b/src/evaluate.c
index a3484c6..6f7913d 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -1971,6 +1971,8 @@ int cmd_evaluate(struct eval_ctx *ctx, struct cmd *cmd)
 		return 0;
 	case CMD_MONITOR:
 		return cmd_evaluate_monitor(ctx, cmd);
+	case CMD_TRACE:
+		return 0; //cmd_evaluate_trace(ctx, cmd);
 	default:
 		BUG("invalid command operation %u\n", cmd->op);
 	};
diff --git a/src/netlink.c b/src/netlink.c
index 84d9d27..17aabd4 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -675,6 +675,10 @@ static struct chain *netlink_delinearize_chain(struct netlink_ctx *ctx,
 		chain->flags        |= CHAIN_F_BASECHAIN;
 	}
 
+	if (nft_chain_attr_is_set(nlc, NFT_CHAIN_ATTR_TYPE))
+		chain->policy =
+		nft_chain_attr_get_u32(nlc, NFT_CHAIN_ATTR_POLICY);
+
 	return chain;
 }
 
diff --git a/src/parser_bison.y b/src/parser_bison.y
index fd2407c..447f07b 100644
--- a/src/parser_bison.y
+++ b/src/parser_bison.y
@@ -190,6 +190,7 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %token DESCRIBE			"describe"
 %token EXPORT			"export"
 %token MONITOR			"monitor"
+%token TRACE			"trace"
 
 %token ACCEPT			"accept"
 %token DROP			"drop"
@@ -402,8 +403,8 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %type <cmd>			line
 %destructor { cmd_free($$); }	line
 
-%type <cmd>			base_cmd add_cmd create_cmd insert_cmd delete_cmd list_cmd flush_cmd rename_cmd export_cmd monitor_cmd describe_cmd
-%destructor { cmd_free($$); }	base_cmd add_cmd create_cmd insert_cmd delete_cmd list_cmd flush_cmd rename_cmd export_cmd monitor_cmd describe_cmd
+%type <cmd>			base_cmd add_cmd create_cmd insert_cmd delete_cmd list_cmd flush_cmd rename_cmd export_cmd monitor_cmd describe_cmd trace_cmd
+%destructor { cmd_free($$); }	base_cmd add_cmd create_cmd insert_cmd delete_cmd list_cmd flush_cmd rename_cmd export_cmd monitor_cmd describe_cmd trace_cmd
 
 %type <handle>			table_spec tables_spec chain_spec chain_identifier ruleid_spec ruleset_spec
 %destructor { handle_free(&$$); } table_spec tables_spec chain_spec chain_identifier ruleid_spec ruleset_spec
@@ -640,6 +641,7 @@ base_cmd		:	/* empty */	add_cmd		{ $$ = $1; }
 			|	EXPORT		export_cmd	{ $$ = $2; }
 			|	MONITOR		monitor_cmd	{ $$ = $2; }
 			|	DESCRIBE	describe_cmd	{ $$ = $2; }
+			|	TRACE		trace_cmd	{ $$ = $2; }
 			;
 
 add_cmd			:	TABLE		table_spec
@@ -809,6 +811,16 @@ export_cmd		:	export_format
 			}
 			;
 
+trace_cmd	:
+			{
+				struct handle h = { .family = NFPROTO_UNSPEC };
+				struct trace *t = trace_alloc();
+				t->location = @-1;
+				$$ = cmd_alloc(CMD_TRACE, CMD_OBJ_TRACE, &h, &@$, t);
+			}
+			;
+
+
 monitor_cmd		:	monitor_event	monitor_object	monitor_format
 			{
 				struct handle h = { .family = NFPROTO_UNSPEC };
diff --git a/src/rule.c b/src/rule.c
index 8d76fd0..9cb8715 100644
--- a/src/rule.c
+++ b/src/rule.c
@@ -573,6 +573,19 @@ void monitor_free(struct monitor *m)
 	xfree(m);
 }
 
+struct trace *trace_alloc(void)
+{
+	struct trace *tr;
+
+	tr = xmalloc(sizeof(struct trace));
+	return tr;
+}
+
+void trace_free(struct trace *tr)
+{
+	xfree(tr);
+}
+
 void cmd_free(struct cmd *cmd)
 {
 	handle_free(&cmd->handle);
@@ -602,6 +615,9 @@ void cmd_free(struct cmd *cmd)
 		case CMD_OBJ_EXPORT:
 			export_free(cmd->export);
 			break;
+		case CMD_OBJ_TRACE:
+			trace_free(cmd->trace);
+			break;
 		default:
 			BUG("invalid command object type %u\n", cmd->obj);
 		}
@@ -945,6 +961,251 @@ static int do_command_rename(struct netlink_ctx *ctx, struct cmd *cmd)
 	return 0;
 }
 
+#include <libnetfilter_log/libnetfilter_log.h>
+#include "netlink.h"
+#include "mnl.h"
+
+static void *memrchr(const void *s, int c, size_t n)
+{
+    const unsigned char *cp;
+
+    if (n != 0) {
+	cp = (unsigned char *)s + n;
+	do {
+	    if (*(--cp) == (unsigned char)c)
+		return((void *)cp);
+	} while (--n != 0);
+    }
+    return((void *)0);
+}
+
+struct trace_prefix
+{
+	char *table;
+	char *chain;
+	char *action;
+	char *num;
+};
+
+static int parse_trace_prefix(char *prefix, struct trace_prefix *td)
+{
+	static const char *TRACE = "TRACE: ";
+	char *pos = prefix;
+	char *end = NULL;
+
+//	printf("%s\n", prefix);
+	/* "TRACE: filter:input:rule:2 " */
+	if (strncmp(prefix, TRACE, strlen(TRACE)) != 0)
+		return -1;
+
+	pos += strlen(TRACE);
+	end = pos + strlen(pos);
+	/* "filter:input:rule:2 " */
+	/* TABLE:CHAIN:ACTION:NUM */
+	/* parse reverse as : is allowed in iptables chain names */
+	if ( (end=memrchr(pos, ':', end-pos)) == NULL )
+		goto invalid_format_error;
+	*end = '\0';
+	td->num = end+1;
+
+	if ( (end=memrchr(pos, ':', end-1-pos)) == NULL )
+		goto invalid_format_error;
+	*end = '\0';
+	td->action = end+1;
+
+	td->table = pos;
+
+	if ( (end=strchr(pos, ':')) == NULL )
+		goto invalid_format_error;
+	*end = '\0';
+	td->chain = end + 1;
+
+//	printf("table '%s' chain '%s' action '%s' num '%s'\n", td->table, td->chain, td->action, td->num);
+
+	return 0;
+invalid_format_error:
+	return -1;
+}
+
+static int create_cache(struct netlink_ctx *ctx, struct handle *h, struct location *loc)
+{
+	struct table *t, *nt;
+	struct set *s, *ns;
+	struct chain *c, *nc;
+	struct rule *r, *rc;
+	struct netlink_ctx tmpctx;
+	LIST_HEAD(msgs);
+	struct handle filter;
+
+	/* create cache */
+	memset(&tmpctx, 0, sizeof(tmpctx));
+	init_list_head(&msgs);
+	tmpctx.msgs = &msgs;
+
+	if (netlink_list_tables(ctx, h, loc) < 0)
+		return -1;
+
+	printf("XXXXXXXXXXXXXX\n");
+	list_for_each_entry_safe(t, nt, &ctx->list, list) {
+		printf("%s\n", t->handle.table);
+		table_add_hash(t);
+
+		filter.family = t->handle.family;
+		filter.table = t->handle.table;
+		filter.chain = NULL;
+
+		init_list_head(&tmpctx.list);
+
+		if (netlink_list_sets(&tmpctx, &filter, loc) < 0)
+			return -1;
+
+		list_for_each_entry_safe(s, ns, &tmpctx.list, list) {
+			if (netlink_get_setelems(&tmpctx, &s->handle, loc, s) < 0)
+				return -1;
+			list_move_tail(&s->list, &t->sets);
+			set_add_hash(s, t);
+		}
+
+		init_list_head(&tmpctx.list);
+
+		if (netlink_list_chains(&tmpctx, &filter, loc) < 0)
+			return -1;
+
+		list_for_each_entry_safe(c, nc, &tmpctx.list, list) {
+			chain_add_hash(c, t);
+		}
+
+		init_list_head(&tmpctx.list);
+
+		if (netlink_list_table(&tmpctx, &filter, loc) < 0)
+			return -1;
+
+		list_for_each_entry_safe(r, rc, &tmpctx.list, list) {
+			c = chain_lookup(t, &r->handle);
+			if (c == NULL) {
+				c = chain_alloc(r->handle.chain);
+				chain_add_hash(c, t);
+			}
+			list_move_tail(&r->list, &c->rules);
+		}
+		init_list_head(&tmpctx.list);
+	}
+	return 0;
+}
+
+static int trace_cb(struct nflog_g_handle *gh, struct nfgenmsg *nfmsg,
+			  struct nflog_data *nfa, void *data)
+{
+	struct trace_prefix td = {};
+	char *prefix = NULL;
+	struct handle h;
+	struct table *table;
+	struct chain *chain;
+	struct rule *rule = NULL;
+	const struct stmt *stmt;
+	int i;
+
+	prefix = strdup(nflog_get_prefix(nfa));
+	if (parse_trace_prefix(prefix, &td) != 0)
+		goto invalid_format_error;
+
+	h.table = td.table;
+	h.family = NFPROTO_IPV4;
+	table = table_lookup(&h);
+
+	h.chain = td.chain;
+	chain = chain_lookup(table, &h);
+
+	if (strcmp(td.action, "rule") == 0) {
+		i = atoi(td.num);
+		list_for_each_entry(rule, &chain->rules, list) {
+			if (--i != 0)
+				continue;
+			break;
+		}
+		list_for_each_entry(stmt, &rule->stmts, list) {
+			if (stmt->ops->type == STMT_META &&
+				stmt->meta.key == NFT_META_NFTRACE)
+			{
+				printf("\n%s\n", "a->b");
+				break;
+			}
+		}
+	}
+
+
+	if (strcmp(td.action, "policy") == 0) {
+		char *policies[NF_MAX_VERDICT] = {
+			[NF_DROP] = "DROP",
+			[NF_ACCEPT] = "ACCEPT",
+			[NF_QUEUE] = "QUEUE",
+		};
+		printf("\t%s %s NFMARK=0x%x\n", td.table, td.chain, nflog_get_nfmark(nfa));
+		printf("\t\t%s\n", policies[chain->policy]);
+	} else
+	if (strcmp(td.action, "rule") == 0) {
+		printf("\t%s %s (#%i) NFMARK=0x%x\n", td.table, td.chain, atoi(td.num), nflog_get_nfmark(nfa));
+		printf("\t\t");
+		rule_print(rule);
+		printf("\n");
+	} else
+	if (strcmp(td.action, "return") == 0) {
+		printf("\t%s %s NFMARK=0x%x\n", td.table, td.chain, nflog_get_nfmark(nfa));
+		printf("\t\t => RETURN\n");
+	}
+
+invalid_format_error:
+	free(prefix);
+	return 0;
+}
+static int do_command_trace(struct netlink_ctx *ctx, struct cmd *cmd)
+{
+	struct nflog_handle *h;
+	struct nflog_g_handle *qh;
+	int fd;
+	int rv=0;
+	char buf[4096];
+
+	struct handle x = { .family = NFPROTO_IPV4 };
+	create_cache(ctx, &x, &cmd->location);
+
+    h = nflog_open();
+	if (!h) {
+		fprintf(stderr, "error during nflog_open()\n");
+		exit(1);
+	}
+	if (nflog_unbind_pf(h, AF_INET) < 0) {
+		fprintf(stderr, "error nflog_unbind_pf()\n");
+		exit(1);
+
+	}
+
+	if (nflog_bind_pf(h, AF_INET) < 0 ){
+		fprintf(stderr, "error during nflog_bind_pf()\n");
+		exit(1);
+	}
+
+	qh = nflog_bind_group(h, 0);
+	if (!qh) {
+		fprintf(stderr, "no handle for grup 0\n");
+		exit(1);
+	}
+
+	if (nflog_set_mode(qh, NFULNL_COPY_PACKET, 0xffff) < 0) {
+		fprintf(stderr, "can't set packet copy mode\n");
+		exit(1);
+	}
+
+	fd = nflog_fd(h);
+	nflog_callback_register(qh, &trace_cb, ctx);
+
+	while ((rv = recv(fd, buf, sizeof(buf), 0)) && rv >= 0) {
+		nflog_handle_packet(h, buf, rv);
+	}
+
+	return 0;
+}
+
 static int do_command_monitor(struct netlink_ctx *ctx, struct cmd *cmd)
 {
 	struct table *t, *nt;
@@ -1029,6 +1290,8 @@ int do_command(struct netlink_ctx *ctx, struct cmd *cmd)
 		return do_command_monitor(ctx, cmd);
 	case CMD_DESCRIBE:
 		return do_command_describe(ctx, cmd);
+	case CMD_TRACE:
+		return do_command_trace(ctx, cmd);
 	default:
 		BUG("invalid command object type %u\n", cmd->obj);
 	}
diff --git a/src/scanner.l b/src/scanner.l
index 73c4f8b..4f8ace4 100644
--- a/src/scanner.l
+++ b/src/scanner.l
@@ -263,6 +263,7 @@ addrstring	({macaddr}|{ip4addr}|{ip6addr})
 "rename"		{ return RENAME; }
 "export"		{ return EXPORT; }
 "monitor"		{ return MONITOR; }
+"trace"			{ return TRACE; }
 
 "position"		{ return POSITION; }
 "comment"		{ return COMMENT; }

[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux