This adds support for masquerading into a smaller subset of ports - defined by the PSID values from RFC-7597 Section 5.1. This is part of the support for MAP-E and Lightweight 4over6, which allows multiple devices to share an IPv4 address by splitting the L4 port / id into ranges. Co-developed-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx> Co-developed-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Blair Steven <blair.steven@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Cole Dishington <Cole.Dishington@xxxxxxxxxxxxxxxxxxx> --- Notes: Thanks for your time reviewing. I have also submitted a patch to netfilter iptables for these changes. Comments: Selecting the ports for psid needs to be in nf_nat_core since the PSID ranges are not a single range. e.g. offset=1024, PSID=0, psid_length=8 generates the ranges 1024-1027, 2048-2051, ..., 63488-63491, ... (example taken from RFC7597 B.2). This is why it is enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init upper/lower boundaries. Changes in v2: - Moved cached range2 from struct nf_conn to nf_conn_nat. - Moved psid fields out of union nf_conntrack_man_proto. Now using range2 fields src, dst, and base to store psid parameters. - Readded removed error check for nf_ct_expect_related() - Added new version to masquerade iptables extension to use the range2 base field. include/net/netfilter/nf_nat.h | 1 + include/uapi/linux/netfilter/nf_nat.h | 3 +- net/netfilter/nf_nat_core.c | 69 +++++++++++++++++++++++---- net/netfilter/nf_nat_ftp.c | 29 ++++++----- net/netfilter/nf_nat_helper.c | 16 +++++-- net/netfilter/nf_nat_masquerade.c | 13 +++-- net/netfilter/xt_MASQUERADE.c | 44 +++++++++++++++-- 7 files changed, 140 insertions(+), 35 deletions(-) diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 987111ae5240..67cc033f76bb 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -32,6 +32,7 @@ struct nf_conn_nat { union nf_conntrack_nat_help help; #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) int masq_index; + struct nf_nat_range2 *range; #endif }; diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h index a64586e77b24..660e53ffdb57 100644 --- a/include/uapi/linux/netfilter/nf_nat.h +++ b/include/uapi/linux/netfilter/nf_nat.h @@ -12,6 +12,7 @@ #define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) #define NF_NAT_RANGE_PROTO_OFFSET (1 << 5) #define NF_NAT_RANGE_NETMAP (1 << 6) +#define NF_NAT_RANGE_PSID (1 << 7) #define NF_NAT_RANGE_PROTO_RANDOM_ALL \ (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) @@ -20,7 +21,7 @@ (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \ NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \ NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \ - NF_NAT_RANGE_NETMAP) + NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID) struct nf_nat_ipv4_range { unsigned int flags; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 7de595ead06a..7307bb28ece2 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -195,13 +195,32 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) + const union nf_conntrack_man_proto *max, + const union nf_conntrack_man_proto *base, + bool is_psid) { __be16 port; + u16 offset_mask = 0; + u16 psid_mask = 0; + u16 psid = 0; + + /* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */ + if (is_psid) { + u16 j = ntohs(max->all) - ntohs(min->all) + 1; + u16 a = (1 << 16) / ntohs(base->all); + + offset_mask = (a - 1) * ntohs(base->all); + psid_mask = ((ntohs(base->all) / j) << 1) - 1; + psid = ntohs(min->all) & psid_mask; + } switch (tuple->dst.protonum) { case IPPROTO_ICMP: case IPPROTO_ICMPV6: + if (is_psid) { + return ((ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) && + ((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid); + } return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); case IPPROTO_GRE: /* all fall though */ @@ -215,6 +234,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, else port = tuple->dst.u.all; + if (is_psid) { + return ((ntohs(port) & offset_mask) != 0) && + ((ntohs(port) & psid_mask) == psid); + } return ntohs(port) >= ntohs(min->all) && ntohs(port) <= ntohs(max->all); default: @@ -239,7 +262,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple, return 1; return l4proto_in_range(tuple, NF_NAT_MANIP_SRC, - &range->min_proto, &range->max_proto); + &range->min_proto, &range->max_proto, &range->base_proto, + range->flags & NF_NAT_RANGE_PSID); } static inline int @@ -360,10 +384,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone, * * Per-protocol part of tuple is initialized to the incoming packet. */ -static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) +void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) { unsigned int range_size, min, max, i, attempts; __be16 *keyptr; @@ -420,6 +444,25 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, return; } + if (range->flags & NF_NAT_RANGE_PSID) { + /* PSID defines a group of port ranges, per PSID. PSID + * is already contained in min and max. + */ + unsigned int min_to_max, base; + + min = ntohs(range->min_proto.all); + max = ntohs(range->max_proto.all); + base = ntohs(range->base_proto.all); + min_to_max = max - min; + for (; max <= (1 << 16) - 1; min += base, max = min + min_to_max) { + for (off = 0; off <= min_to_max; off++) { + *keyptr = htons(min + off); + if (!nf_nat_used_tuple(tuple, ct)) + return; + } + } + } + /* If no range specified... */ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ @@ -529,11 +572,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, /* Only bother mapping if it's not already in range and unique */ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { - if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { + /* PSID mode is present always needs to check + * to see if the source ports are in range. + */ + if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED || + (range->flags & NF_NAT_RANGE_PSID && + !in_range(orig_tuple, range))) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, - &range->min_proto, - &range->max_proto) && + &range->min_proto, + &range->max_proto, + &range->base_proto, + range->flags & + NF_NAT_RANGE_PSID) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) return; diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index aace6768a64e..f65163278db0 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -17,6 +17,10 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_ftp.h> +void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct); #define NAT_HELPER_NAME "ftp" @@ -72,8 +76,13 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, u_int16_t port; int dir = CTINFO2DIR(ctinfo); struct nf_conn *ct = exp->master; + struct nf_conn_nat *nat = nfct_nat(ct); char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN]; unsigned int buflen; + int ret; + + if (WARN_ON_ONCE(!nat)) + return NF_DROP; pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen); @@ -86,18 +95,14 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, * this one. */ exp->expectfn = nf_nat_follow_master; - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } + /* Find a port that matches the MASQ rule. */ + nf_nat_l4proto_unique_tuple(&exp->tuple, nat->range, + dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST, + ct); + ret = nf_ct_expect_related(exp, 0); + port = ntohs(exp->tuple.dst.u.tcp.port); + if (ret != 0 && ret != -EBUSY) { + port = 0; } if (port == 0) { diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index a263505455fc..2d105e4eb8f8 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -179,15 +179,23 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet); void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct nf_conn_nat *nat = NULL; struct nf_nat_range2 range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); - /* Change src to where master sends to */ - range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_addr = range.max_addr - = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + if (exp->master && !exp->dir) { + nat = nfct_nat(exp->master); + if (nat) + range = *nat->range; + } + if (!nat) { + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 8e8a65d46345..d83cd3d8ad3f 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -45,10 +45,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, return NF_DROP; } - nat = nf_ct_nat_ext_add(ct); - if (nat) - nat->masq_index = out->ifindex; - /* Transfer from original range. */ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); @@ -57,6 +53,15 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, newrange.max_addr.ip = newsrc; newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; + newrange.base_proto = range->base_proto; + + nat = nf_ct_nat_ext_add(ct); + if (nat) { + nat->masq_index = out->ifindex; + if (!nat->range) + nat->range = kmalloc(sizeof(*nat->range), 0); + memcpy(nat->range, &newrange, sizeof(*nat->range)); + } /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c index eae05c178336..dc6870ca2b71 100644 --- a/net/netfilter/xt_MASQUERADE.c +++ b/net/netfilter/xt_MASQUERADE.c @@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@xxxxxxxxxxxxx>"); MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); /* FIXME: Multiple targets. --RR */ -static int masquerade_tg_check(const struct xt_tgchk_param *par) +static int masquerade_tg_check_v0(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; @@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par) return nf_ct_netns_get(par->net, par->family); } +static int masquerade_tg_check_v1(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range2 *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + return nf_ct_netns_get(par->net, par->family); +} + static unsigned int -masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) +masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_nat_range2 range; const struct nf_nat_ipv4_multi_range_compat *mr; @@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) xt_out(par)); } +static unsigned int +masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range2 *range = par->targinfo; + + return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range, + xt_out(par)); +} + static void masquerade_tg_destroy(const struct xt_tgdtor_param *par) { nf_ct_netns_put(par->net, par->family); @@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = { { #if IS_ENABLED(CONFIG_IPV6) .name = "MASQUERADE", + .revision = 0, .family = NFPROTO_IPV6, .target = masquerade_tg6, .targetsize = sizeof(struct nf_nat_range), @@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = { }, { #endif .name = "MASQUERADE", + .revision = 0, .family = NFPROTO_IPV4, - .target = masquerade_tg, + .target = masquerade_tg_v0, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = 1 << NF_INET_POST_ROUTING, - .checkentry = masquerade_tg_check, + .checkentry = masquerade_tg_check_v0, .destroy = masquerade_tg_destroy, .me = THIS_MODULE, - } + }, + { + .name = "MASQUERADE", + .revision = 1, + .family = NFPROTO_IPV4, + .target = masquerade_tg_v1, + .targetsize = sizeof(struct nf_nat_range2), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .checkentry = masquerade_tg_check_v1, + .destroy = masquerade_tg_destroy, + .me = THIS_MODULE, + }, }; static int __init masquerade_tg_init(void) -- 2.32.0