Hi Jarno, Thanks for working on this. Mostly just a few style things around #ifdefs below. On 9 March 2016 at 15:10, Jarno Rajahalme <jarno@xxxxxxx> wrote: > Extend OVS conntrack interface to cover NAT. New nested > OVS_CT_ATTR_NAT attribute may be used to include NAT with a CT action. > A bare OVS_CT_ATTR_NAT only mangles existing and expected connections. > If OVS_NAT_ATTR_SRC or OVS_NAT_ATTR_DST is included within the nested > attributes, new (non-committed/non-confirmed) connections are mangled > according to the rest of the nested attributes. > > The corresponding OVS userspace patch series includes test cases (in > tests/system-traffic.at) that also serve as example uses. > > This work extends on a branch by Thomas Graf at > https://github.com/tgraf/ovs/tree/nat. Thomas, I guess there was not signoff in these patches so Jarno does not have your signoff in this patch. > Signed-off-by: Jarno Rajahalme <jarno@xxxxxxx> > --- > v9: Fixed module dependencies. > > include/uapi/linux/openvswitch.h | 49 ++++ > net/openvswitch/Kconfig | 3 +- > net/openvswitch/conntrack.c | 523 +++++++++++++++++++++++++++++++++++++-- > net/openvswitch/conntrack.h | 3 +- > 4 files changed, 551 insertions(+), 27 deletions(-) <snip> > diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig > index cd5fd9d..23471a4 100644 > --- a/net/openvswitch/Kconfig > +++ b/net/openvswitch/Kconfig > @@ -6,7 +6,8 @@ config OPENVSWITCH > tristate "Open vSwitch" > depends on INET > depends on !NF_CONNTRACK || \ > - (NF_CONNTRACK && (!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6)) > + (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ > + (!NF_NAT || NF_NAT))) Whitespace. > select LIBCRC32C > select MPLS > select NET_MPLS_GSO > diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c > index 5711f80..6455237 100644 > --- a/net/openvswitch/conntrack.c > +++ b/net/openvswitch/conntrack.c <snip> > struct ovs_ct_len_tbl { > - size_t maxlen; > - size_t minlen; > + int maxlen; > + int minlen; > }; Are these changed for a specific reason, or just to use INT_MAX rather than SIZE_MAX in ovs_ct_len_tbl? > /* Metadata mark for masked write to conntrack mark */ > @@ -42,15 +52,29 @@ struct md_labels { > struct ovs_key_ct_labels mask; > }; > > +#ifdef CONFIG_NF_NAT_NEEDED > +enum ovs_ct_nat { > + OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ > + OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ > + OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ > +}; > +#endif Here... > /* Conntrack action context for execution. */ > struct ovs_conntrack_info { > struct nf_conntrack_helper *helper; > struct nf_conntrack_zone zone; > struct nf_conn *ct; > u8 commit : 1; > +#ifdef CONFIG_NF_NAT_NEEDED > + u8 nat : 3; /* enum ovs_ct_nat */ > +#endif and here.. I wonder if we can trim more of these #ifdefs, for readability and more compiler coverage if the feature is disabled. > u16 family; > struct md_mark mark; > struct md_labels labels; > +#ifdef CONFIG_NF_NAT_NEEDED > + struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */ > +#endif > }; (probably not this one, though) > static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); > @@ -137,12 +161,15 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, > ovs_ct_get_labels(ct, &key->ct.labels); > } > > -/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has > - * previously sent the packet to conntrack via the ct action. > +/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has > + * previously sent the packet to conntrack via the ct action. If > + * 'keep_nat_flags' is true, the existing NAT flags retained, else they are > + * initialized from the connection status. > */ > static void ovs_ct_update_key(const struct sk_buff *skb, > const struct ovs_conntrack_info *info, > - struct sw_flow_key *key, bool post_ct) > + struct sw_flow_key *key, bool post_ct, > + bool keep_nat_flags) > { > const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; > enum ip_conntrack_info ctinfo; > @@ -160,6 +187,16 @@ static void ovs_ct_update_key(const struct sk_buff *skb, > */ > if (ct->master) > state |= OVS_CS_F_RELATED; > +#ifdef CONFIG_NF_NAT_NEEDED > + if (keep_nat_flags) { > + state |= key->ct.state & OVS_CS_F_NAT_MASK; > + } else { > + if (ct->status & IPS_SRC_NAT) > + state |= OVS_CS_F_SRC_NAT; > + if (ct->status & IPS_DST_NAT) > + state |= OVS_CS_F_DST_NAT; > + } > +#endif This could be nested within something like if (CONFIG_NF_NAT_NEEDED) {...} or if (IS_ENABLED(...)) ... to always run the code through the compiler (which would likely compile out the whole check). <snip> > +#ifdef CONFIG_NF_NAT_NEEDED > + /* Adjust seqs after helper. This is needed due to some helpers (e.g., > + * FTP with NAT) adusting the TCP payload size when mangling IP > + * addresses and/or port numbers in the text-based control connection. > + */ > + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && > + !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) > + return NF_DROP; > +#endif > + return NF_ACCEPT; > } I suspect this would still compile and work correctly without the ifdef? <snip> > +/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ > +static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, > + const struct ovs_conntrack_info *info, > + struct sk_buff *skb, struct nf_conn *ct, > + enum ip_conntrack_info ctinfo) > +{ > + enum nf_nat_manip_type maniptype; > + int err; > + > + if (nf_ct_is_untracked(ct)) { > + /* A NAT action may only be performed on tracked packets. */ > + return NF_ACCEPT; > + } > + > + /* Add NAT extension if not confirmed yet. */ > + if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) > + return NF_ACCEPT; /* Can't NAT. */ > + > + /* Determine NAT type. > + * Check if the NAT type can be deduced from the tracked connection. > + * Make sure expected traffic is NATted only when committing. > + */ > + if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && > + ct->status & IPS_NAT_MASK && > + (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) { > + /* NAT an established or related connection like before. */ > + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) > + /* This is the REPLY direction for a connection > + * for which NAT was applied in the forward > + * direction. Do the reverse NAT. > + */ > + maniptype = ct->status & IPS_SRC_NAT > + ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; > + else > + maniptype = ct->status & IPS_SRC_NAT > + ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; > + } else if (info->nat & OVS_CT_SRC_NAT) { > + maniptype = NF_NAT_MANIP_SRC; > + } else if (info->nat & OVS_CT_DST_NAT) { > + maniptype = NF_NAT_MANIP_DST; > + } else { > + return NF_ACCEPT; /* Connection is not NATed. */ > + } > + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); > + > + /* Mark NAT done if successful and update the flow key. */ > + if (err == NF_ACCEPT) > + ovs_nat_update_key(key, skb, maniptype); > + > + return err; > +} > +#endif <snip> > - /* Call the helper only if we did nf_conntrack_in() above ('!cached') > - * for confirmed connections, but only when committing for unconfirmed > - * connections. > - */ > ct = nf_ct_get(skb, &ctinfo); > - if (ct && (nf_ct_is_confirmed(ct) ? !cached : info->commit) && > - ovs_ct_helper(skb, info->family) != NF_ACCEPT) { > - WARN_ONCE(1, "helper rejected packet"); > - return -EINVAL; > + if (ct) { > +#ifdef CONFIG_NF_NAT_NEEDED > + /* Packets starting a new connection must be NATted before the > + * helper, so that the helper knows about the NAT. We enforce > + * this by delaying both NAT and helper calls for unconfirmed > + * connections until the committing CT action. For later > + * packets NAT and Helper may be called in either order. > + * > + * NAT will be done only if the CT action has NAT, and only > + * once per packet (per zone), as guarded by the NAT bits in > + * the key->ct.state. > + */ > + if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) && > + (nf_ct_is_confirmed(ct) || info->commit) && > + ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { > + return -EINVAL; > + } > +#endif Again, I think the #ifdefs are probably unnecessary. Currently ovs_ct_nat() is conditionally compiled out, but we could provide a stub version for the !CONFIG_NF_NAT_NEEDED case instead, placed immediately below the actual implementation further up: ... #else /* !CONFIG_NF_NAT_NEEDED */ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { return NF_ACCEPT; } #endif Thanks, Joe -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html