Hello,
On Wed, 27 Aug 2014, Alex Gartrell wrote:
> At Facebook we use ipip forwarding to deliver packets from our layer 4 ipvs
> load balancers to our layer 7 proxies. Today these layer 7 proxies are all
> dual stacked, so we can simply send v4 over v4 and v6 over v6. In the
> future, we're going to have v6-only layer 7 load balancers (no internal v4
> address). To deal with this, we'll forward v4 packets in v6 tunnels. This
> patchset introduces the necessary functionality into ipvs.
>
> The noteworthy limitation of this is that it is not compatible with state
> synchronization, so great care is taken to keep these things mutually
> exclusive.
>
> This patchset includes changes that add an additional netlink attribute to
> destinations and changes that plumb the destination address family through
> parts of the code where it was assumed that it was the same as the service.
> Finally, there's a change that updates the transmit functions for tunneling
> to share common code and support v4 in v6 and vice versa.
>
> Changes for v2:
>
> Introduced crosses_local_route_boundary and update_pmtu functions and
> conditionally do the ip_hdr operations. The latter means that df will be
> effectively zero when we forward an ipv6 packet over an ipv4 tunnel.
>
> Additionally, I addressed Julian's other statements.
>
> Changes for v3:
>
> added back the first two patches
> checkpatch.pl all of the things
> pull out the mtu changes
> other previously detailed fixes
I think, we are almost at the final:
Patch 5:
- we can mix patch 5 and 6?
Patch 7:
- I guess addr_type var should be in the
'if (skb_af == AF_INET6) {' block. So, do not
forget to compile next time with CONFIG_IP_VS_IPV6=n
Patch 9:
- for ensure_mtu_is_adequate():
- move 'struct netns_ipvs *ipvs' below, where ipvs is
assigned
- move 'struct net *net' below, where net is assigned
Patch 10:
- 'is'->'if' in
/* We only care about the df field is sysctl_pmtu_disc(ipvs) is set */
After checking the cp->daddr usage I prepared
a patch that we should insert somewhere before the last one.
Attached. Later we have to wait
"ipvs: properly declare tunnel encapsulation" to appear in net-next.
Regards
--
Julian Anastasov <ja@xxxxxx>
From ebfbe5cd7dc929a03ef078d76fa859231273c22d Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@xxxxxx>
Date: Thu, 28 Aug 2014 17:51:03 +0300
Subject: [PATCH] ipvs: use the new dest addr family field
Use the new address family field cp->daf when printing
cp->daddr in logs or connection listing.
Signed-off-by: Julian Anastasov <ja@xxxxxx>
---
net/netfilter/ipvs/ip_vs_conn.c | 46 ++++++++++++++++++++++++++---------
net/netfilter/ipvs/ip_vs_core.c | 6 ++---
net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +-
4 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 13e9cee..9067c1e 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/in.h>
+#include <linux/inet.h>
#include <linux/net.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -588,7 +589,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -685,7 +686,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -754,7 +755,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
ntohs(ct->cport),
IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
ntohs(ct->vport),
- IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
ntohs(ct->dport));
/*
@@ -1051,6 +1052,9 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
+#ifdef CONFIG_IP_VS_IPV6
+ char dbuf[INET6_ADDRSTRLEN];
+#endif
if (!ip_vs_conn_net_eq(cp, net))
return 0;
@@ -1065,24 +1069,32 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
pe_data[len] = '\0';
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
- "%pI6 %04X %-11s %7lu%s\n",
+ "%s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
- " %08X %04X %-11s %7lu%s\n",
+ " %s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
@@ -1120,6 +1132,9 @@ static const char *ip_vs_origin_name(unsigned int flags)
static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
{
+#ifdef CONFIG_IP_VS_IPV6
+ char dbuf[INET6_ADDRSTRLEN];
+#endif
if (v == SEQ_START_TOKEN)
seq_puts(seq,
@@ -1132,12 +1147,21 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
return 0;
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
- seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
+ seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
@@ -1145,11 +1169,11 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
- "%08X %04X %-11s %-6s %7lu\n",
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 0cf952a..5cc2dc36 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -492,9 +492,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
- IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
- IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 2f7ea75..5b84c0b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -432,7 +432,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index e3a6972..8e92beb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -510,7 +510,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
th->fin ? 'F' : '.',
th->ack ? 'A' : '.',
th->rst ? 'R' : '.',
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
--
1.9.0