This adds support for masquerading into a smaller subset of ports - defined by the PSID values from RFC-7597 Section 5.1. This is part of the support for MAP-E and Lightweight 4over6, which allows multiple devices to share an IPv4 address by splitting the L4 port / id into ranges. Co-developed-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx> Co-developed-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Blair Steven <blair.steven@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Cole Dishington <Cole.Dishington@xxxxxxxxxxxxxxxxxxx> --- include/net/netfilter/nf_conntrack.h | 2 + .../netfilter/nf_conntrack_tuple_common.h | 5 + include/uapi/linux/netfilter/nf_nat.h | 3 +- net/netfilter/nf_nat_core.c | 101 ++++++++++++++++-- net/netfilter/nf_nat_ftp.c | 23 ++-- net/netfilter/nf_nat_helper.c | 15 ++- 6 files changed, 120 insertions(+), 29 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 439379ca9ffa..d63d38aa7188 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -92,6 +92,8 @@ struct nf_conn { /* If we were expected by an expectation, this will be it */ struct nf_conn *master; + struct nf_nat_range2 *range; + #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h index 64390fac6f7e..36d16d47c2b0 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h @@ -39,6 +39,11 @@ union nf_conntrack_man_proto { struct { __be16 key; /* GRE key is 32bit, PPtP only uses 16bit */ } gre; + struct { + unsigned char psid_length; + unsigned char offset; + __be16 psid; + } psid; }; #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h index a64586e77b24..660e53ffdb57 100644 --- a/include/uapi/linux/netfilter/nf_nat.h +++ b/include/uapi/linux/netfilter/nf_nat.h @@ -12,6 +12,7 @@ #define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) #define NF_NAT_RANGE_PROTO_OFFSET (1 << 5) #define NF_NAT_RANGE_NETMAP (1 << 6) +#define NF_NAT_RANGE_PSID (1 << 7) #define NF_NAT_RANGE_PROTO_RANDOM_ALL \ (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) @@ -20,7 +21,7 @@ (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \ NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \ NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \ - NF_NAT_RANGE_NETMAP) + NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID) struct nf_nat_ipv4_range { unsigned int flags; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index b7c3c902290f..7730ce4ca9a9 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -232,13 +232,33 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) + const union nf_conntrack_man_proto *max, + bool is_psid) { __be16 port; + int m = 0; + u16 offset_mask = 0; + u16 psid_mask = 0; + + /* In this case we are in PSID mode and the rules are all different */ + if (is_psid) { + /* m = number of bits in each valid range */ + m = 16 - min->psid.psid_length - min->psid.offset; + offset_mask = ((1 << min->psid.offset) - 1) << + (16 - min->psid.offset); + psid_mask = ((1 << min->psid.psid_length) - 1) << m; + } + switch (tuple->dst.protonum) { case IPPROTO_ICMP: case IPPROTO_ICMPV6: + if (is_psid) { + return ((ntohs(tuple->src.u.icmp.id) & offset_mask) != + 0) && + ((ntohs(tuple->src.u.icmp.id) & psid_mask) == + min->psid.psid); + } return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); case IPPROTO_GRE: /* all fall though */ @@ -252,6 +272,11 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, else port = tuple->dst.u.all; + if (is_psid) { + return ((ntohs(port) & offset_mask) != 0) && + (((ntohs(port) & psid_mask) >> m) == + min->psid.psid); + } return ntohs(port) >= ntohs(min->all) && ntohs(port) <= ntohs(max->all); default: @@ -274,9 +299,9 @@ static int in_range(const struct nf_conntrack_tuple *tuple, if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) return 1; - return l4proto_in_range(tuple, NF_NAT_MANIP_SRC, - &range->min_proto, &range->max_proto); + &range->min_proto, &range->max_proto, + range->flags & NF_NAT_RANGE_PSID); } static inline int @@ -397,10 +422,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone, * * Per-protocol part of tuple is initialized to the incoming packet. */ -static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) +void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) { unsigned int range_size, min, max, i, attempts; __be16 *keyptr; @@ -457,6 +482,50 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, return; } + if (range->flags & NF_NAT_RANGE_PSID) { + /* Find the non-PSID parts of the port. + * To do this we look for an unused port that is + * comprised of [t_chunk|PSID|b_chunk]. The size of + * these pieces is defined by the psid_length and + * offset. + */ + int m = 16 - range->min_proto.psid.psid_length - + range->min_proto.psid.offset; + int available; + int range_count = ((1 << range->min_proto.psid.offset) - 1); + + /* Calculate the size of the bottom block */ + range_size = (1 << m); + + /* Calculate the total IDs to check */ + available = range_size * range_count; + if (!available) + available = range_size; + + off = ntohs(*keyptr); + for (i = 0;; ++off) { + int b_chunk = off % range_size; + int t_chunk = 0; + + /* Move up to avoid the all-zeroes reserved chunk + * (if there is one). + */ + if (range->min_proto.psid.offset > 0) { + t_chunk = (off >> m) % range_count; + ++t_chunk; + t_chunk <<= (m + + range->min_proto.psid.psid_length); + } + + *keyptr = htons(t_chunk | + (range->min_proto.psid.psid << m) + | b_chunk); + + if (++i >= available || !nf_nat_used_tuple(tuple, ct)) + return; + } + } + /* If no range specified... */ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ @@ -566,11 +635,18 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, /* Only bother mapping if it's not already in range and unique */ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { - if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { + /* Now that the PSID mode is present we always need to check + * to see if the source ports are in range. + */ + if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED || + (range->flags & NF_NAT_RANGE_PSID && + !in_range(orig_tuple, range))) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, - &range->min_proto, - &range->max_proto) && + &range->min_proto, + &range->max_proto, + range->flags & + NF_NAT_RANGE_PSID) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) return; @@ -623,6 +699,11 @@ nf_nat_setup_info(struct nf_conn *ct, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); + if (range) { + if (!ct->range) + ct->range = kmalloc(sizeof(*ct->range), 0); + memcpy(ct->range, range, sizeof(*ct->range)); + } if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { struct nf_conntrack_tuple reply; diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index aace6768a64e..006b7e1836ff 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -17,6 +17,10 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_ftp.h> +void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct); #define NAT_HELPER_NAME "ftp" @@ -86,19 +90,12 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, * this one. */ exp->expectfn = nf_nat_follow_master; - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } + /* Find a port that matches the MASQ rule. */ + nf_nat_l4proto_unique_tuple(&exp->tuple, ct->range, + dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST, + ct); + port = ntohs(exp->tuple.dst.u.tcp.port); + nf_ct_expect_related(exp, 0); if (port == 0) { nf_ct_helper_log(skb, ct, "all ports in use"); diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index a263505455fc..090153475d4d 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -184,11 +184,16 @@ void nf_nat_follow_master(struct nf_conn *ct, /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); - /* Change src to where master sends to */ - range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_addr = range.max_addr - = ct->master->tuplehash[!exp->dir].tuple.dst.u3; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + if (exp->master && exp->master->range && !exp->dir) { + range = *exp->master->range; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + } else { + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + } /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); -- 2.31.1