On Mon, Feb 17, 2014 at 12:15:19PM +0100, Florian Westphal wrote: > Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> wrote: > > > > I think we should always do the module autoloading for nf-nat and > > > > nf-nat-ipvX modules depending on nf_ct_l3num(ct), then return EAGAIN > > > > to give another retry. Now, this needs to happen in any case, not only > > > > when calling ctnetlink_parse_nat_setup(). > > > > > > Not sure what you mean. If we enter nf_nat_setup_info without ctnetlink > > > involvement the nf-nat protocol should already be there (else, how can > > > we end up in nf_nat_setup_info? NAT/MASQUERADE depends on nf-nat). > > > > > > What use-case did you have in mind? (or, to put it differently, where > > > do you think the module probing logic should be)? > > > > If __nf_nat_l3proto_find returns NULL before trying to attach the null > > binding, I think you should call the routine to autoload the modules > > before returning EAGAIN. > > > > proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); > > if (proto == NULL) { > > ... release locks > > request_module(...); > > ... acquire locks again > > return -EAGAIN; > > } > > The patch alters ctnetlink to call ctnetlink_parse_nat_setup even when > NAT attr == NULL. > > nfnetlink_attach_null_binding() returns EAGAIN; this return value is > propagated back to ctnetlink_parse_nat_setup. > > That will then request_module(), nfnetlink will replay the message. > > running > conntrack -I -p tcp -s 1.1.1.1 -d 2.2.2.2 --timeout 100 --state \ > ESTABLISHED --sport 10 --dport 20 > on newly booted machine works, lsmod pre/post > shows: > +nf_conntrack_ipv4 14808 1 > +nf_defrag_ipv4 12702 1 nf_conntrack_ipv4 > +nf_nat_ipv4 13199 0 > +nf_nat 20926 1 nf_nat_ipv4 > > Which is the desired behaviour afaiu. Right, your patch looks good. > [ If you think calling ctnetlink_parse_nat_setup with NULL attr > is abuse, please let me know and I will try to come up with something > different ] I think we can simplify this, the nfnetlink_parse_nat_setup() hook function is always called under rcu_read_lock. See patch attached.
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bb322d0..b9f0e03 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1310,27 +1310,22 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) } static int -ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) +ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { #ifdef CONFIG_NF_NAT_NEEDED int ret; - if (cda[CTA_NAT_DST]) { - ret = ctnetlink_parse_nat_setup(ct, - NF_NAT_MANIP_DST, - cda[CTA_NAT_DST]); - if (ret < 0) - return ret; - } - if (cda[CTA_NAT_SRC]) { - ret = ctnetlink_parse_nat_setup(ct, - NF_NAT_MANIP_SRC, - cda[CTA_NAT_SRC]); - if (ret < 0) - return ret; - } - return 0; + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, + cda[CTA_NAT_DST]); + if (ret < 0) + return ret; + + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC, + cda[CTA_NAT_SRC]); + return ret; #else + if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) + return 0; return -EOPNOTSUPP; #endif } @@ -1659,11 +1654,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, goto err2; } - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = ctnetlink_change_nat(ct, cda); - if (err < 0) - goto err2; - } + err = ctnetlink_setup_nat(ct, cda); + if (err < 0) + goto err2; nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index d3f5cd6..52ca952 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -432,15 +432,15 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); -unsigned int -nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +static unsigned int +__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) { /* Force range to this IP; let proto decide mapping for * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). * Use reply in case it's already been mangled (eg local packet). */ union nf_inet_addr ip = - (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + (manip == NF_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); struct nf_nat_range range = { @@ -448,7 +448,13 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) .min_addr = ip, .max_addr = ip, }; - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); + return nf_nat_setup_info(ct, &range, manip); +} + +unsigned int +nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); @@ -702,9 +708,9 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { static int nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range *range) + const struct nf_conn *ct, struct nf_nat_range *range, + const struct nf_nat_l3proto *l3proto) { - const struct nf_nat_l3proto *l3proto; struct nlattr *tb[CTA_NAT_MAX+1]; int err; @@ -714,38 +720,46 @@ nfnetlink_parse_nat(const struct nlattr *nat, if (err < 0) return err; - rcu_read_lock(); - l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); - if (l3proto == NULL) { - err = -EAGAIN; - goto out; - } err = l3proto->nlattr_to_range(tb, range); if (err < 0) - goto out; + return err; if (!tb[CTA_NAT_PROTO]) - goto out; + return 0; - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); -out: - rcu_read_unlock(); - return err; + return nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); } +/* This function is called under rcu_read_lock() */ static int nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) { struct nf_nat_range range; + const struct nf_nat_l3proto *l3proto; int err; - err = nfnetlink_parse_nat(attr, ct, &range); + /* Should not happen, restricted to creating new conntracks + * via ctnetlink. + */ + if (WARN_ON_ONCE(nf_nat_initialized(ct, manip))) + return -EEXIST; + + /* Make sure that L3 NAT is there by when we call nf_nat_setup_info to + * attach the null binding, otherwise this may oops. + */ + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + if (l3proto == NULL) + return -EAGAIN; + + /* No NAT information has been passed, allocate the null-binding */ + if (attr == NULL) + return __nf_nat_alloc_null_binding(ct, manip); + + err = nfnetlink_parse_nat(attr, ct, &range, l3proto); if (err < 0) return err; - if (nf_nat_initialized(ct, manip)) - return -EEXIST; return nf_nat_setup_info(ct, &range, manip); } -- 1.7.10.4