Re: [PATCH nf-next 1/6] netfilter: nf_tables: extend tracing infrastructure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 24, 2015 at 11:02:06AM +0100, Florian Westphal wrote:
> nft monitor mode can then decode and display this trace data.
> 
> Parts of LL/Network/Transport headers are provided as separate
> attributes.
> 
> Otherwise, printing IP address data becomes virtually impossible
> for userspace since in the case of the netdev family we really don't
> want userspace to have to know all the possible link layer types
> and/or sizes just to display/print an ip address.
> 
> We also don't want userspace to have to follow ipv6 header chains
> to get the s/dport info, the kernel already did this work so just
> follow suit.
> 
> Signed-off-by: Florian Westphal <fw@xxxxxxxxx>
> ---
>  include/net/netfilter/nf_tables.h        |   6 +
>  include/uapi/linux/netfilter/nf_tables.h |  32 ++++++
>  net/netfilter/nf_tables_api.c            | 190 +++++++++++++++++++++++++++++++
>  net/netfilter/nf_tables_core.c           |  28 +++--
>  net/netfilter/nft_meta.c                 |   3 +
>  5 files changed, 252 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
> index 4bd7508..5131ad4 100644
> --- a/include/net/netfilter/nf_tables.h
> +++ b/include/net/netfilter/nf_tables.h
> @@ -890,6 +890,12 @@ void nft_unregister_chain_type(const struct nf_chain_type *);
>  int nft_register_expr(struct nft_expr_type *);
>  void nft_unregister_expr(struct nft_expr_type *);
>  
> +void nf_tables_trace_notify(const struct nft_pktinfo *pkt,
> +			    const struct nft_chain *chain,
> +			    const struct nft_rule *rule,
> +			    u32 verdict,
> +			    enum nft_trace_types type);
> +
>  #define nft_dereference(p)					\
>  	nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
>  
> diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> index d8c8a7c..88bcd00 100644
> --- a/include/uapi/linux/netfilter/nf_tables.h
> +++ b/include/uapi/linux/netfilter/nf_tables.h
> @@ -83,6 +83,7 @@ enum nft_verdicts {
>   * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
>   * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
>   * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
> + * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes)
>   */
>  enum nf_tables_msg_types {
>  	NFT_MSG_NEWTABLE,
> @@ -102,6 +103,7 @@ enum nf_tables_msg_types {
>  	NFT_MSG_DELSETELEM,
>  	NFT_MSG_NEWGEN,
>  	NFT_MSG_GETGEN,
> +	NFT_MSG_TRACE,
>  	NFT_MSG_MAX,
>  };
>  
> @@ -970,4 +972,34 @@ enum nft_gen_attributes {
>  };
>  #define NFTA_GEN_MAX		(__NFTA_GEN_MAX - 1)
>  
> +enum nft_trace_attibutes {
> +	NFTA_TRACE_UNSPEC,
> +	NFTA_TRACE_CHAIN,
> +	NFTA_TRACE_DEV_TYPE,
> +	NFTA_TRACE_ID,
> +	NFTA_TRACE_IIF,
> +	NFTA_TRACE_OIF,
> +	NFTA_TRACE_LL_HEADER,
> +	NFTA_TRACE_MARK,
> +	NFTA_TRACE_NETWORK_HEADER,
> +	NFTA_TRACE_TABLE,
> +	NFTA_TRACE_TRANSPORT_HEADER,
> +	NFTA_TRACE_TRANSPORT_PROTO,
> +	NFTA_TRACE_TYPE,
> +	NFTA_TRACE_RULE_HANDLE,
> +	NFTA_TRACE_VERDICT,
> +	NFTA_TRACE_VLAN_TAG,
> +	__NFTA_TRACE_MAX
> +};
> +#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1)
> +
> +enum nft_trace_types {
> +	NFT_TRACETYPE_UNSPEC,
> +	NFT_TRACETYPE_PACKET,
> +	NFT_TRACETYPE_POLICY,
> +	NFT_TRACETYPE_RETURN,
> +	NFT_TRACETYPE_RULE,
> +	__NFT_TRACETYPE_MAX
> +};
> +#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1)
>  #endif /* _LINUX_NF_TABLES_H */
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 93cc473..25d8168 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -9,6 +9,8 @@
>   */
>  
>  #include <linux/module.h>
> +#include <linux/hash.h>
> +#include <linux/if_vlan.h>
>  #include <linux/init.h>
>  #include <linux/list.h>
>  #include <linux/skbuff.h>
> @@ -21,6 +23,10 @@
>  #include <net/net_namespace.h>
>  #include <net/sock.h>
>  
> +#define NFT_TRACETYPE_LL_HSIZE		20
> +#define NFT_TRACETYPE_NETWORK_HSIZE	32
> +#define NFT_TRACETYPE_TRANSPORT_HSIZE	 4
> +
>  static LIST_HEAD(nf_tables_expressions);
>  
>  /**
> @@ -468,6 +474,84 @@ nla_put_failure:
>  	return -1;
>  }
>  
> +static bool trace_notify_put_data(struct sk_buff *nlskb, u16 type,
> +				  const struct sk_buff *skb,
> +				  int off, unsigned int plen)

Minor nitpick: Probably you can rename this to _fill_*_info for
consistency with other nf_tables netlink code.

> +{
> +	struct nlattr *nla;
> +
> +	if (skb_tailroom(nlskb) < nla_total_size(plen))
> +		return false;
> +
> +	nla = (struct nlattr *)skb_put(nlskb, nla_total_size(plen));
> +	nla->nla_type = type;
> +	nla->nla_len = nla_attr_size(plen);
> +
> +	if (skb_copy_bits(skb, off, nla_data(nla), plen))
> +		return false;
> +
> +	return true;
> +}
> +
> +static bool
> +trace_notify_put_packet(struct sk_buff *nlskb, const struct nft_pktinfo *pkt)
> +{
> +	const struct sk_buff *skb = pkt->skb;
> +	unsigned int plen = min_t(unsigned int,
> +				  pkt->xt.thoff - skb_network_offset(skb),
> +				  NFT_TRACETYPE_NETWORK_HSIZE);
> +	int mac_off;
> +
> +	if (plen >= 20u && /* minimum iphdr size */
> +	    !trace_notify_put_data(nlskb, NFTA_TRACE_NETWORK_HEADER,
> +				   skb, skb_network_offset(skb), plen))
> +		return false;
> +
> +	if (nla_put_u8(nlskb, NFTA_TRACE_TRANSPORT_PROTO, pkt->tprot))
> +		return false;
> +
> +	plen = min_t(unsigned int, skb->len - pkt->xt.thoff,
> +		     NFT_TRACETYPE_TRANSPORT_HSIZE);
> +
> +	if (plen >= sizeof(u32) &&
> +	    !trace_notify_put_data(nlskb, NFTA_TRACE_TRANSPORT_HEADER,
> +				   skb, pkt->xt.thoff, plen))
> +		return false;
> +
> +	switch (pkt->pf) {
> +	case NFPROTO_ARP: /* fallthrough */
> +	case NFPROTO_BRIDGE:
> +		break;
> +	case NFPROTO_NETDEV:
> +		if (WARN_ON_ONCE(!skb->dev))
> +			break;
> +		if (nla_put_be16(nlskb, NFTA_TRACE_DEV_TYPE,
> +				 htons(skb->dev->type)))
> +			return false;
> +		break;
> +	default:
> +		return true;
> +	}
> +
> +	if (skb_vlan_tag_get(skb) &&
> +	    !nla_put_be16(nlskb, NFTA_TRACE_VLAN_TAG,
> +			  htons(skb_vlan_tag_get(skb))))
> +		return false;
> +
> +	if (!skb_mac_header_was_set(skb))
> +		return true;
> +
> +	plen = min_t(unsigned int,
> +		     skb->data - skb_mac_header(skb), NFT_TRACETYPE_LL_HSIZE);
> +	mac_off = skb_mac_header(skb) - skb->data;
> +
> +	if (plen && !trace_notify_put_data(nlskb, NFTA_TRACE_LL_HEADER,
> +					   skb, mac_off, plen))
> +		return false;
> +
> +	return true;
> +}

Do you think we can place all this new netlink code in
net/netfilter/nf_tables_trace.c ? So we leave in the core file only
our classifier engine.

I like the attribute definition rename in nf_tables.h, but this code
we can probably place it away from here.

>  static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
>  {
>  	struct sk_buff *skb;
> @@ -499,6 +583,112 @@ err:
>  	return err;
>  }
>  
> +void nf_tables_trace_notify(const struct nft_pktinfo *pkt,
> +			    const struct nft_chain *chain,
> +			    const struct nft_rule *rule,
> +			    u32 verdict,
> +			    enum nft_trace_types type)
> +{
> +	struct nfgenmsg *nfmsg;
> +	struct nlmsghdr *nlh;
> +	struct sk_buff *skb;
> +	unsigned int size;
> +	int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
> +
> +	if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTABLES))
> +		return;
> +
> +	/* Unlike other notifiers we need GFP_ATOMIC so use actual size
> +	 * needed instead of NLMSG_GOODSIZE.
> +	 */
> +	size = nlmsg_total_size(sizeof(struct nfgenmsg))
> +		+ nla_total_size(sizeof(__be32))	/* trace type */
> +		+ nla_total_size(NFT_TABLE_MAXNAMELEN)
> +		+ nla_total_size(NFT_CHAIN_MAXNAMELEN)
> +		+ nla_total_size(sizeof(u32))	/* iif */
> +		+ nla_total_size(sizeof(u32))	/* oif */
> +		+ nla_total_size(sizeof(u32))	/* id */
> +		+ nla_total_size(sizeof(u32))	/* mark */
> +		+ nla_total_size(sizeof(u32))	/* verdict */
> +		+ nla_total_size(sizeof(__be64)); /* rule handle */
> +
> +	switch (type) {
> +	case NFT_TRACETYPE_PACKET:
> +		size += nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE)
> +			+ nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE)
> +			+ nla_total_size(NFT_TRACETYPE_LL_HSIZE)
> +			+ nla_total_size(sizeof(__be16)) /* vlan tag */
> +			+ nla_total_size(sizeof(__be16)) /* device type */
> +			+ nla_total_size(sizeof(__u8));  /* transport prot */
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	skb = nlmsg_new(size, GFP_ATOMIC);
> +	if (!skb)
> +		return;
> +
> +	nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
> +	if (!nlh)
> +		goto nla_put_failure;
> +
> +	nfmsg = nlmsg_data(nlh);
> +	nfmsg->nfgen_family	= pkt->pf;
> +	nfmsg->version		= NFNETLINK_V0;
> +	nfmsg->res_id		= htons(pkt->net->nft.base_seq & 0xffff);
> +
> +	if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(type)))
> +		goto nla_put_failure;
> +
> +	if (nla_put_be32(skb, NFTA_TRACE_ID, htonl(hash32_ptr(pkt->skb))))
> +		goto nla_put_failure;
> +
> +	if (chain) {
> +		if (nla_put_string(skb, NFTA_TRACE_TABLE, chain->table->name))
> +			goto nla_put_failure;
> +		if (nla_put_string(skb, NFTA_TRACE_CHAIN, chain->name))
> +			goto nla_put_failure;
> +	}
> +
> +	if (rule && nla_put_be64(skb, NFTA_TRACE_RULE_HANDLE,
> +				 cpu_to_be64(rule->handle)))
> +		goto nla_put_failure;
> +
> +	if (pkt->in &&
> +	    nla_put_be32(skb, NFTA_TRACE_IIF, htonl(pkt->in->ifindex)))
> +		goto nla_put_failure;
> +	if (pkt->out &&
> +	    nla_put_be32(skb, NFTA_TRACE_OIF, htonl(pkt->out->ifindex)))
> +		goto nla_put_failure;
> +	if (pkt->skb->mark &&
> +	    nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
> +		goto nla_put_failure;
> +
> +	switch (type) {
> +	case NFT_TRACETYPE_POLICY:
> +	case NFT_TRACETYPE_RETURN:
> +	case NFT_TRACETYPE_RULE:
> +		if (nla_put_be32(skb, NFTA_TRACE_VERDICT, htonl(verdict)))
> +			goto nla_put_failure;
> +		break;
> +	case NFT_TRACETYPE_PACKET:
> +		if (!trace_notify_put_packet(skb, pkt))
> +			goto nla_put_failure;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	nlmsg_end(skb, nlh);
> +	nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTABLES, 0, GFP_ATOMIC);
> +	return;
> +
> + nla_put_failure:
> +	WARN_ON_ONCE(1);
> +	kfree_skb(skb);
> +}
> +
>  static int nf_tables_dump_tables(struct sk_buff *skb,
>  				 struct netlink_callback *cb)
>  {
> diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
> index f3695a4..29a6ca9 100644
> --- a/net/netfilter/nf_tables_core.c
> +++ b/net/netfilter/nf_tables_core.c
> @@ -56,10 +56,15 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt,
>  
>  static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
>  				    const struct nft_chain *chain,
> -				    int rulenum, enum nft_trace type)
> +				    const struct nft_rule *rule,
> +				    int rulenum,
> +				    u32 verdict,
> +				    enum nft_trace_types type)
>  {
> -	if (unlikely(pkt->skb->nf_trace))
> +	if (unlikely(pkt->skb->nf_trace)) {
> +		nf_tables_trace_notify(pkt, chain, rule, verdict, type);
>  		__nft_trace_packet(pkt, chain, rulenum, type);
> +	}
>  }
>  
>  static void nft_cmp_fast_eval(const struct nft_expr *expr,
> @@ -151,7 +156,8 @@ next_rule:
>  			regs.verdict.code = NFT_CONTINUE;
>  			continue;
>  		case NFT_CONTINUE:
> -			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
> +			nft_trace_packet(pkt, chain, rule, rulenum,
> +					 regs.verdict.code, NFT_TRACETYPE_RULE);
>  			continue;
>  		}
>  		break;
> @@ -161,7 +167,9 @@ next_rule:
>  	case NF_ACCEPT:
>  	case NF_DROP:
>  	case NF_QUEUE:
> -		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
> +		nft_trace_packet(pkt, chain, rule, rulenum,
> +				 regs.verdict.code & NF_VERDICT_MASK,
> +				 NFT_TRACETYPE_RULE);
>  		return regs.verdict.code;
>  	}
>  
> @@ -174,7 +182,8 @@ next_rule:
>  		stackptr++;
>  		/* fall through */
>  	case NFT_GOTO:
> -		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
> +		nft_trace_packet(pkt, chain, rule, rulenum,
> +				 regs.verdict.code, NFT_TRACETYPE_RULE);
>  
>  		chain = regs.verdict.chain;
>  		goto do_chain;
> @@ -182,7 +191,10 @@ next_rule:
>  		rulenum++;
>  		/* fall through */
>  	case NFT_RETURN:
> -		nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
> +		if (stackptr)

Why this new branch?

> +			nft_trace_packet(pkt, chain, rule, rulenum,
> +					 regs.verdict.code,
> +					 NFT_TRACETYPE_RETURN);
>  		break;
>  	default:
>  		WARN_ON(1);

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux