[PATCH net-next 3/3] ipv4: ICMP packet inspection for multipath

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



ICMP packets are inspected to let them route together with the flow they
belong to, allowing anycast environments to work with ECMP.

Signed-off-by: Peter Nørlund <pch@xxxxxxxxxxxx>
---
 net/ipv4/icmp.c  | 27 ++++++++++++++++++-
 net/ipv4/route.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3abcfea..20f1d5e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -447,6 +447,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 {
 	struct rtable *rt, *rt2;
 	struct flowi4 fl4_dec;
+	struct flowi4 mp_flow;
 	int err;
 
 	memset(fl4, 0, sizeof(*fl4));
@@ -459,7 +460,31 @@ static struct rtable *icmp_route_lookup(struct net *net,
 	fl4->fl4_icmp_type = type;
 	fl4->fl4_icmp_code = code;
 	security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
-	rt = __ip_route_output_key(net, fl4, NULL);
+
+	/* Source and destination is swapped. See ip_multipath_flow */
+	mp_flow.saddr = iph->daddr;
+	mp_flow.daddr = iph->saddr;
+	mp_flow.flowi4_proto = iph->protocol;
+	mp_flow.fl4_sport = 0;
+	mp_flow.fl4_dport = 0;
+	if (!ip_is_fragment(iph)) {
+		if (iph->protocol == IPPROTO_TCP ||
+		    iph->protocol == IPPROTO_UDP ||
+		    iph->protocol == IPPROTO_SCTP) {
+			__be16 _ports[2];
+			const __be16 *ports;
+
+			ports = skb_header_pointer(skb_in, iph->ihl * 4,
+						   sizeof(_ports),
+						   &_ports);
+			if (ports) {
+				mp_flow.fl4_sport = ports[1];
+				mp_flow.fl4_dport = ports[0];
+			}
+		}
+	}
+
+	rt = __ip_route_output_key(net, fl4, &mp_flow);
 	if (IS_ERR(rt))
 		return rt;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a1ec62c..bab4318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1635,31 +1635,83 @@ out:
 /* Fill flow key data based on packet for use in multipath routing. */
 static void ip_multipath_flow(const struct sk_buff *skb, struct flowi4 *flow)
 {
-	const struct iphdr *iph;
-
-	iph = ip_hdr(skb);
-
-	flow->saddr = iph->saddr;
-	flow->daddr = iph->daddr;
-	flow->flowi4_proto = iph->protocol;
+	struct icmphdr _icmph;
+	struct iphdr _inner_iph;
+	const struct iphdr *outer_iph;
+	const struct icmphdr *icmph;
+	const struct iphdr *inner_iph;
+	unsigned int offset;
+	__be16 _ports[2];
+	const __be16 *ports;
+
+	outer_iph = ip_hdr(skb);
+
+	flow->saddr = outer_iph->saddr;
+	flow->daddr = outer_iph->daddr;
+	flow->flowi4_proto = outer_iph->protocol;
 	flow->fl4_sport = 0;
 	flow->fl4_dport = 0;
 
-	if (unlikely(ip_is_fragment(iph)))
+	if (unlikely(ip_is_fragment(outer_iph)))
 		return;
 
-	if (iph->protocol == IPPROTO_TCP ||
-	    iph->protocol == IPPROTO_UDP ||
-	    iph->protocol == IPPROTO_SCTP) {
-		__be16 _ports;
-		const __be16 *ports;
+	offset = outer_iph->ihl * 4;
 
-		ports = skb_header_pointer(skb, iph->ihl * 4, sizeof(_ports),
+	if (outer_iph->protocol == IPPROTO_TCP ||
+	    outer_iph->protocol == IPPROTO_UDP ||
+	    outer_iph->protocol == IPPROTO_SCTP) {
+		ports = skb_header_pointer(skb, offset, sizeof(_ports),
 					   &_ports);
 		if (ports) {
 			flow->fl4_sport = ports[0];
 			flow->fl4_dport = ports[1];
 		}
+
+		return;
+	}
+
+	if (outer_iph->protocol != IPPROTO_ICMP)
+		return;
+
+	icmph = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (!icmph)
+		return;
+
+	if (icmph->type != ICMP_DEST_UNREACH &&
+	    icmph->type != ICMP_SOURCE_QUENCH &&
+	    icmph->type != ICMP_REDIRECT &&
+	    icmph->type != ICMP_TIME_EXCEEDED &&
+	    icmph->type != ICMP_PARAMETERPROB) {
+		return;
+	}
+
+	offset += sizeof(_icmph);
+	inner_iph = skb_header_pointer(skb, offset, sizeof(_inner_iph),
+				       &_inner_iph);
+	if (inner_iph)
+		return;
+
+	/* Since the ICMP payload contains a packet sent from the current
+	 * recipient, we swap source and destination addresses and ports
+	 */
+	flow->saddr = inner_iph->daddr;
+	flow->daddr = inner_iph->saddr;
+	flow->flowi4_proto = inner_iph->protocol;
+
+	if (unlikely(ip_is_fragment(inner_iph)))
+		return;
+
+	if (inner_iph->protocol != IPPROTO_TCP &&
+	    inner_iph->protocol != IPPROTO_UDP &&
+	    inner_iph->protocol != IPPROTO_SCTP) {
+		return;
+	}
+
+	offset += inner_iph->ihl * 4;
+	ports = skb_header_pointer(skb, offset, sizeof(_ports), &_ports);
+	if (ports) {
+		flow->fl4_sport = ports[1];
+		flow->fl4_dport = ports[0];
 	}
 }
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-api" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux