SKBs are assigned a conntrack entry before being passed to any NFQUEUEs, and if no entry is found then a new one is created. This behavior causes problems for some traffic patterns. For example, if two UDP packets to/from the same host (using the same ports) arrive at the "same" time, both are assigned a new conntrack entry. After the first packet have traversed all chains, the conntrack entry will be inserted into the global table. The second packet will then be dropped during the insertion step, as an entry for the same flow already exists. One type of application that frequently generates this traffic pattern, is DNS resolvers. This commit introduces a new function that checks, and potentially replaces, the conntrack entry for any additional "new" SKBs mapping to an existing flow. While not a perfect solution, there are still situations where to-be-dropped SKBs can slip through, the situations is improved considerably. On the routers I have used for testing, packets belonging to the same UDP flow are let through (when generating the traffic pattern described above). Without the change in this commit, all packets except the first one was dropped. With the change in this commit, a user can implement "perfect" solutions in user-space. An application can for example keep track of seen UDP flows, and then only release packets belonging to one flow when the entry has been created. Without the change, and SKB is stuck with the original conntrack entry. Signed-off-by: Kristian Evensen <kristian.evensen@xxxxxxxxx> --- net/netfilter/nfnetlink_queue.c | 68 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c97966298..150c11ff4 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -43,6 +43,9 @@ #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #endif #define NFQNL_QMAX_DEFAULT 1024 @@ -1046,6 +1049,53 @@ static int nfq_id_after(unsigned int id, unsigned int max) return (int)(id - max) > 0; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +static void nfqnl_update_ct(struct net *net, struct sk_buff *skb) +{ + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = NULL; + unsigned int dataoff; + u16 l3num; + u8 l4num; + + ct = nf_ct_get(skb, &ctinfo); + l3num = nf_ct_l3num(ct); + l3proto = nf_ct_l3proto_find_get(l3num); + + if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, + &l4num) <= 0) { + return; + } + + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, + l4num, net, &tuple, l3proto, l4proto)) { + return; + } + +#if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) + h = nf_conntrack_find_get(net, &ct->zone, &tuple); +#else + h = nf_conntrack_find_get(net, NULL, &tuple); +#endif + + if (h) { + pr_debug("%s: tuple %u %pI4:%hu -> %pI4:%hu\n", __func__, + tuple.dst.protonum, &tuple.src.u3.ip, + ntohs(tuple.src.u.all), &tuple.dst.u3.ip, + ntohs(tuple.dst.u.all)); + nf_ct_put(ct); + ct = nf_ct_tuplehash_to_ctrack(h); + nf_ct_set(skb, ct, IP_CT_NEW); + } +} +#endif + static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1060,6 +1110,7 @@ static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); struct nfnl_queue_net *q = nfnl_queue_pernet(net); + enum ip_conntrack_info ctinfo; queue = verdict_instance_lookup(q, queue_num, NETLINK_CB(skb).portid); @@ -1090,6 +1141,16 @@ static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, list_for_each_entry_safe(entry, tmp, &batch_list, list) { if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + nf_ct_get(entry->skb, &ctinfo); + + if (ctinfo == IP_CT_NEW && verdict != NF_STOLEN && + verdict != NF_DROP) { + nfqnl_update_ct(net, entry->skb); + } +#endif + nf_reinject(entry, verdict); } return 0; @@ -1213,6 +1274,13 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + nf_ct_get(entry->skb, &ctinfo); + + if (ctinfo == IP_CT_NEW && verdict != NF_STOLEN && verdict != NF_DROP) + nfqnl_update_ct(net, entry->skb); +#endif + nf_reinject(entry, verdict); return 0; } -- 2.14.1 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html