Re: conntrack doesn't always work when a bridge is used

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Dec 19, 2007 6:00 PM, Damien Thébault <damien.thebault@xxxxxxxxx> wrote:
> Hello,
>
> I sent the quoted mail to linux-net with my problem yesterday, but I
> did a git bisect today and I got the following output :
>
> > 2bf540b73ed5b304e84bb4d4c390d49d1cfa0ef8 is first bad commit
> > commit 2bf540b73ed5b304e84bb4d4c390d49d1cfa0ef8
> > Author: Patrick McHardy <kaber@xxxxxxxxx>
> > Date:   Wed Dec 13 16:54:25 2006 -0800
> >
> >     [NETFILTER]: bridge-netfilter: remove deferred hooks
> >
> >     Remove the deferred hooks and all related code as scheduled in
> >     feature-removal-schedule.
> >
> >     Signed-off-by: Patrick McHardy <kaber@xxxxxxxxx>
> >     Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
> >
> > :040000 040000 c49ea947455937566b6129991dde5e86f2453aae 6611736ce5c0fcde7627494b66b9ea94e37ea42e M      Documentation
> > :040000 040000 d0dd0700fe68f98b52687be3a0c31d73f7b15b81 f8ddf15a0389c5f5b7f2c11d7d0db039a660e1d5 M      include
> > :040000 040000 dafccf7ff8657be9adca6b28dbd365cdd6c01ca5 3eeb1cb4b16cc5cb698ab559b47ea6b0991d4d3a M      net
>

This morning I reverted the patch and ported it to work with the
net-2.6.25 and 2.6.23 kernels. With it, the behaviour seems to be good
again (I didn't test a lot so I don't know if anything else is broken
by this).
I don't know yet if it solves my RTSP conntrack problem too.

(Yes I know this is not really the good way to handle this, but since
the removal of the deferred hooks before 2.6.20, there was a lot of
changes in this area, so I just tried to see if it was working with
the current kernel)

I'm attaching the two patchs if anyone needs it.
-- 
Damien Thebault
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 0ae682b..919f331 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -181,6 +181,22 @@ Who:	Nick Piggin <npiggin@xxxxxxx>
 
 ---------------------------
 
+What:	Bridge netfilter deferred IPv4/IPv6 output hook calling
+When:	January 2007
+Why:	The deferred output hooks are a layering violation causing unusual
+	and broken behaviour on bridge devices. Examples of things they
+	break include QoS classifation using the MARK or CLASSIFY targets,
+	the IPsec policy match and connection tracking with VLANs on a
+	bridge. Their only use is to enable bridge output port filtering
+	within iptables with the physdev match, which can also be done by
+	combining iptables and ebtables using netfilter marks. Until it
+	will get removed the hook deferral is disabled by default and is
+	only enabled when needed.
+
+Who:	Patrick McHardy <kaber@xxxxxxxxx>
+
+---------------------------
+
 What:	PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
 When:	October 2008
 Why:	The stacking of class devices makes these values misleading and
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 499aa93..d9487b9 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -83,6 +83,7 @@ struct bridge_skb_cb {
 	} daddr;
 };
 
+extern int brnf_deferred_hooks;
 #else
 #define nf_bridge_maybe_copy_header(skb)	(0)
 #define nf_bridge_pad(skb)			(0)
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 9a10092..b96952a 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -36,6 +36,7 @@
 #define NFC_IP_DST_PT		0x0400
 /* Something else about the proto */
 #define NFC_IP_PROTO_UNKNOWN	0x2000
+#endif /* ! __KERNEL__ */
 
 /* IP Hooks */
 /* After promisc drops, checksum checks. */
@@ -49,7 +50,6 @@
 /* Packets about to hit the wire. */
 #define NF_IP_POST_ROUTING	4
 #define NF_IP_NUMHOOKS		5
-#endif /* ! __KERNEL__ */
 
 enum nf_ip_hook_priorities {
 	NF_IP_PRI_FIRST = INT_MIN,
@@ -57,8 +57,10 @@ enum nf_ip_hook_priorities {
 	NF_IP_PRI_RAW = -300,
 	NF_IP_PRI_SELINUX_FIRST = -225,
 	NF_IP_PRI_CONNTRACK = -200,
+	NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
 	NF_IP_PRI_MANGLE = -150,
 	NF_IP_PRI_NAT_DST = -100,
+	NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
 	NF_IP_PRI_FILTER = 0,
 	NF_IP_PRI_NAT_SRC = 100,
 	NF_IP_PRI_SELINUX_LAST = 225,
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 3475a65..07133c9 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -40,6 +40,7 @@
 #define NFC_IP6_DST_PT           0x0400
 /* Something else about the proto */
 #define NFC_IP6_PROTO_UNKNOWN    0x2000
+#endif /* ! __KERNEL__ */
 
 /* IP6 Hooks */
 /* After promisc drops, checksum checks. */
@@ -53,7 +54,6 @@
 /* Packets about to hit the wire. */
 #define NF_IP6_POST_ROUTING	4
 #define NF_IP6_NUMHOOKS		5
-#endif /* ! __KERNEL__ */
 
 
 enum nf_ip6_hook_priorities {
@@ -61,8 +61,10 @@ enum nf_ip6_hook_priorities {
 	NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
 	NF_IP6_PRI_SELINUX_FIRST = -225,
 	NF_IP6_PRI_CONNTRACK = -200,
+	NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
 	NF_IP6_PRI_MANGLE = -150,
 	NF_IP6_PRI_NAT_DST = -100,
+	NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
 	NF_IP6_PRI_FILTER = 0,
 	NF_IP6_PRI_NAT_SRC = 100,
 	NF_IP6_PRI_SELINUX_LAST = 225,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 32ac035..86131b4 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -65,6 +65,9 @@ static int brnf_filter_pppoe_tagged __read_mostly = 1;
 #define brnf_filter_pppoe_tagged 1
 #endif
 
+int brnf_deferred_hooks;
+EXPORT_SYMBOL_GPL(brnf_deferred_hooks);
+
 static inline __be16 vlan_proto(const struct sk_buff *skb)
 {
 	return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -689,46 +692,109 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-/* PF_BRIDGE/LOCAL_OUT ***********************************************
- *
- * This function sees both locally originated IP packets and forwarded
+/* PF_BRIDGE/LOCAL_OUT ***********************************************/
+static int br_nf_local_out_finish(struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		skb_push(skb, VLAN_HLEN);
+		skb->network_header -= VLAN_HLEN;
+	}
+
+	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+		       br_forward_finish, NF_BR_PRI_FIRST + 1);
+
+	return 0;
+}
+
+/* This function sees both locally originated IP packets and forwarded
  * IP packets (in both cases the destination device is a bridge
  * device). It also sees bridged-and-DNAT'ed packets.
+ * To be able to filter on the physical bridge devices (with the physdev
+ * module), we steal packets destined to a bridge device away from the
+ * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later,
+ * when we have determined the real output device. This is done in here.
  *
  * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
  * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
  * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
  * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
  * will be executed.
- */
+ * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched
+ * this packet before, and so the packet was locally originated. We fake
+ * the PF_INET/LOCAL_OUT hook.
+ * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed,
+ * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure
+ * even routed packets that didn't arrive on a bridge interface have their
+ * nf_bridge->physindev set. */
 static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    int (*okfn)(struct sk_buff *))
 {
-	struct net_device *realindev;
+	struct net_device *realindev, *realoutdev;
 	struct nf_bridge_info *nf_bridge;
+	int pf;
 
 	if (!skb->nf_bridge)
 		return NF_ACCEPT;
 
+	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+		pf = PF_INET;
+	else
+		pf = PF_INET6;
+
 	nf_bridge = skb->nf_bridge;
-	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
-		return NF_ACCEPT;
+	nf_bridge->physoutdev = skb->dev;
+	realindev = nf_bridge->physindev;
 
 	/* Bridged, take PF_BRIDGE/FORWARD.
 	 * (see big note in front of br_nf_pre_routing_finish) */
-	nf_bridge->physoutdev = skb->dev;
-	realindev = nf_bridge->physindev;
+	if (nf_bridge->mask & BRNF_BRIDGED_DNAT) {
+		if (nf_bridge->mask & BRNF_PKT_TYPE) {
+			skb->pkt_type = PACKET_OTHERHOST;
+			nf_bridge->mask ^= BRNF_PKT_TYPE;
+		}
+		if (skb->protocol == htons(ETH_P_8021Q)) {
+			skb_push(skb, VLAN_HLEN);
+			skb->network_header -= VLAN_HLEN;
+		}
 
-	if (nf_bridge->mask & BRNF_PKT_TYPE) {
-		skb->pkt_type = PACKET_OTHERHOST;
-		nf_bridge->mask ^= BRNF_PKT_TYPE;
+		NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
+			skb->dev, br_forward_finish);
+		goto out;
 	}
-	nf_bridge_push_encap_header(skb);
+	realoutdev = bridge_parent(skb->dev);
+	if (!realoutdev)
+		return NF_DROP;
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	/* iptables should match -o br0.x */
+	if (nf_bridge->netoutdev)
+		realoutdev = nf_bridge->netoutdev;
+#endif
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		skb_pull(skb, VLAN_HLEN);
+		skb->network_header += VLAN_HLEN;
+	}
+	/* IP forwarded traffic has a physindev, locally
+	 * generated traffic hasn't. */
+	if (realindev != NULL) {
+		if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
+			struct net_device *parent = bridge_parent(realindev);
+			if (parent)
+				realindev = parent;
+		}
 
-	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
-		br_forward_finish);
+		NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
+			       realoutdev, br_nf_local_out_finish,
+			       NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
+	} else {
+		NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
+			       realoutdev, br_nf_local_out_finish,
+			       NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
+	}
+
+out:
 	return NF_STOLEN;
 }
 
@@ -834,6 +900,67 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
+/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT
+ * and PF_INET(6)/POST_ROUTING until we have done the forwarding
+ * decision in the bridge code and have determined nf_bridge->physoutdev. */
+static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff *skb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    int (*okfn)(struct sk_buff *))
+{
+	if ((out->hard_start_xmit == br_dev_xmit &&
+	     okfn != br_nf_forward_finish &&
+	     okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit)
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	    || ((out->priv_flags & IFF_802_1Q_VLAN) &&
+		VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
+#endif
+	    ) {
+		struct nf_bridge_info *nf_bridge;
+
+		if (!skb->nf_bridge) {
+#ifdef CONFIG_SYSCTL
+			/* This code is executed while in the IP(v6) stack,
+			   the version should be 4 or 6. We can't use
+			   skb->protocol because that isn't set on
+			   PF_INET(6)/LOCAL_OUT. */
+			struct iphdr *ip = ip_hdr(skb);
+
+			if (ip->version == 4 && !brnf_call_iptables)
+				return NF_ACCEPT;
+			else if (ip->version == 6 && !brnf_call_ip6tables)
+				return NF_ACCEPT;
+			else if (!brnf_deferred_hooks)
+				return NF_ACCEPT;
+#endif
+			if (hook == NF_IP_POST_ROUTING)
+				return NF_ACCEPT;
+			if (!nf_bridge_alloc(skb))
+				return NF_DROP;
+		}
+
+		nf_bridge = skb->nf_bridge;
+
+		/* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we
+		 * will need the indev then. For a brouter, the real indev
+		 * can be a bridge port, so we make sure br_nf_local_out()
+		 * doesn't use the bridge parent of the indev by using
+		 * the BRNF_DONT_TAKE_PARENT mask. */
+		if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) {
+			nf_bridge->mask |= BRNF_DONT_TAKE_PARENT;
+			nf_bridge->physindev = (struct net_device *)in;
+		}
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+		/* the iptables outdev is br0.x, not br0 */
+		if (out->priv_flags & IFF_802_1Q_VLAN)
+			nf_bridge->netoutdev = (struct net_device *)out;
+#endif
+		return NF_STOP;
+	}
+
+	return NF_ACCEPT;
+}
+
 /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
  * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
  * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
@@ -879,6 +1006,36 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 	  .pf = PF_INET6,
 	  .hooknum = NF_INET_PRE_ROUTING,
 	  .priority = NF_IP6_PRI_FIRST, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_FORWARD,
+	  .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_FORWARD,
+	  .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_LOCAL_OUT,
+	  .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_LOCAL_OUT,
+	  .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_POST_ROUTING,
+	  .priority = NF_IP_PRI_FIRST, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_POST_ROUTING,
+	  .priority = NF_IP6_PRI_FIRST, },
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 678b683..6e7eb9b 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -104,16 +104,20 @@ physdev_mt_check(const char *tablename, const void *ip,
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
 		return false;
-	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
+	if (brnf_deferred_hooks == 0 &&
+	    info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
 	    hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
 			 (1 << NF_INET_POST_ROUTING))) {
 		printk(KERN_WARNING "physdev match: using --physdev-out in the "
 		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
-		       "traffic is not supported anymore.\n");
-		if (hook_mask & (1 << NF_INET_LOCAL_OUT))
-			return false;
+		       "traffic is deprecated and breaks other things, it will "
+		       "be removed in January 2007. See Documentation/"
+		       "feature-removal-schedule.txt for details. This doesn't "
+		       "affect you in case you're using it for purely bridged "
+		       "traffic.\n");
+		brnf_deferred_hooks = 1;
 	}
 	return true;
 }
Index: Documentation/feature-removal-schedule.txt
===================================================================
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -154,6 +154,22 @@
 
 ---------------------------
 
+What:	Bridge netfilter deferred IPv4/IPv6 output hook calling
+When:	January 2007
+Why:	The deferred output hooks are a layering violation causing unusual
+	and broken behaviour on bridge devices. Examples of things they
+	break include QoS classifation using the MARK or CLASSIFY targets,
+	the IPsec policy match and connection tracking with VLANs on a
+	bridge. Their only use is to enable bridge output port filtering
+	within iptables with the physdev match, which can also be done by
+	combining iptables and ebtables using netfilter marks. Until it
+	will get removed the hook deferral is disabled by default and is
+	only enabled when needed.
+
+Who:	Patrick McHardy <kaber@xxxxxxxxx>
+
+---------------------------
+
 What:	PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
 When:	October 2008
 Why:	The stacking of class devices makes these values misleading and
Index: include/linux/netfilter_bridge.h
===================================================================
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -82,6 +82,7 @@
 	} daddr;
 };
 
+extern int brnf_deferred_hooks;
 #else
 #define nf_bridge_maybe_copy_header(skb)	(0)
 #define nf_bridge_pad(skb)			(0)
Index: include/linux/netfilter_ipv4.h
===================================================================
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -57,8 +57,10 @@
 	NF_IP_PRI_RAW = -300,
 	NF_IP_PRI_SELINUX_FIRST = -225,
 	NF_IP_PRI_CONNTRACK = -200,
+	NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
 	NF_IP_PRI_MANGLE = -150,
 	NF_IP_PRI_NAT_DST = -100,
+	NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
 	NF_IP_PRI_FILTER = 0,
 	NF_IP_PRI_NAT_SRC = 100,
 	NF_IP_PRI_SELINUX_LAST = 225,
Index: include/linux/netfilter_ipv6.h
===================================================================
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -62,8 +62,10 @@
 	NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
 	NF_IP6_PRI_SELINUX_FIRST = -225,
 	NF_IP6_PRI_CONNTRACK = -200,
+	NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
 	NF_IP6_PRI_MANGLE = -150,
 	NF_IP6_PRI_NAT_DST = -100,
+	NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
 	NF_IP6_PRI_FILTER = 0,
 	NF_IP6_PRI_NAT_SRC = 100,
 	NF_IP6_PRI_SELINUX_LAST = 225,
Index: net/bridge/br_netfilter.c
===================================================================
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -65,6 +65,9 @@
 #define brnf_filter_pppoe_tagged 1
 #endif
 
+int brnf_deferred_hooks;
+EXPORT_SYMBOL_GPL(brnf_deferred_hooks);
+
 static inline __be16 vlan_proto(const struct sk_buff *skb)
 {
 	return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -697,47 +700,110 @@
 	return NF_STOLEN;
 }
 
-/* PF_BRIDGE/LOCAL_OUT ***********************************************
- *
- * This function sees both locally originated IP packets and forwarded
+/* PF_BRIDGE/LOCAL_OUT ***********************************************/
+static int br_nf_local_out_finish(struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		skb_push(skb, VLAN_HLEN);
+		skb->network_header -= VLAN_HLEN;
+	}
+
+	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+		       br_forward_finish, NF_BR_PRI_FIRST + 1);
+
+	return 0;
+}
+
+/* This function sees both locally originated IP packets and forwarded
  * IP packets (in both cases the destination device is a bridge
  * device). It also sees bridged-and-DNAT'ed packets.
+ * To be able to filter on the physical bridge devices (with the physdev
+ * module), we steal packets destined to a bridge device away from the
+ * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later,
+ * when we have determined the real output device. This is done in here.
  *
  * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
  * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
  * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
  * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
  * will be executed.
- */
+ * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched
+ * this packet before, and so the packet was locally originated. We fake
+ * the PF_INET/LOCAL_OUT hook.
+ * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed,
+ * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure
+ * even routed packets that didn't arrive on a bridge interface have their
+ * nf_bridge->physindev set. */
 static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    int (*okfn)(struct sk_buff *))
 {
-	struct net_device *realindev;
+	struct net_device *realindev, *realoutdev;
 	struct sk_buff *skb = *pskb;
 	struct nf_bridge_info *nf_bridge;
+	int pf;
 
 	if (!skb->nf_bridge)
 		return NF_ACCEPT;
 
+	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+		pf = PF_INET;
+	else
+		pf = PF_INET6;
+
 	nf_bridge = skb->nf_bridge;
-	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
-		return NF_ACCEPT;
+	nf_bridge->physoutdev = skb->dev;
+	realindev = nf_bridge->physindev;
 
 	/* Bridged, take PF_BRIDGE/FORWARD.
 	 * (see big note in front of br_nf_pre_routing_finish) */
-	nf_bridge->physoutdev = skb->dev;
-	realindev = nf_bridge->physindev;
+	if (nf_bridge->mask & BRNF_BRIDGED_DNAT) {
+		if (nf_bridge->mask & BRNF_PKT_TYPE) {
+			skb->pkt_type = PACKET_OTHERHOST;
+			nf_bridge->mask ^= BRNF_PKT_TYPE;
+		}
+		if (skb->protocol == htons(ETH_P_8021Q)) {
+			skb_push(skb, VLAN_HLEN);
+			skb->network_header -= VLAN_HLEN;
+		}
 
-	if (nf_bridge->mask & BRNF_PKT_TYPE) {
-		skb->pkt_type = PACKET_OTHERHOST;
-		nf_bridge->mask ^= BRNF_PKT_TYPE;
+		NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
+			skb->dev, br_forward_finish);
+		goto out;
 	}
-	nf_bridge_push_encap_header(skb);
+	realoutdev = bridge_parent(skb->dev);
+	if (!realoutdev)
+		return NF_DROP;
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	/* iptables should match -o br0.x */
+	if (nf_bridge->netoutdev)
+		realoutdev = nf_bridge->netoutdev;
+#endif
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		skb_pull(skb, VLAN_HLEN);
+		skb->network_header += VLAN_HLEN;
+	}
+	/* IP forwarded traffic has a physindev, locally
+	 * generated traffic hasn't. */
+	if (realindev != NULL) {
+		if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
+			struct net_device *parent = bridge_parent(realindev);
+			if (parent)
+				realindev = parent;
+		}
 
-	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
-		br_forward_finish);
+		NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
+			       realoutdev, br_nf_local_out_finish,
+			       NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
+	} else {
+		NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
+			       realoutdev, br_nf_local_out_finish,
+			       NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
+	}
+
+out:
 	return NF_STOLEN;
 }
 
@@ -841,6 +907,69 @@
 	return NF_ACCEPT;
 }
 
+/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT
+ * and PF_INET(6)/POST_ROUTING until we have done the forwarding
+ * decision in the bridge code and have determined nf_bridge->physoutdev. */
+static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *skb = *pskb;
+
+	if ((out->hard_start_xmit == br_dev_xmit &&
+	     okfn != br_nf_forward_finish &&
+	     okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit)
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	    || ((out->priv_flags & IFF_802_1Q_VLAN) &&
+		VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
+#endif
+	    ) {
+		struct nf_bridge_info *nf_bridge;
+
+		if (!skb->nf_bridge) {
+#ifdef CONFIG_SYSCTL
+			/* This code is executed while in the IP(v6) stack,
+			   the version should be 4 or 6. We can't use
+			   skb->protocol because that isn't set on
+			   PF_INET(6)/LOCAL_OUT. */
+			struct iphdr *ip = ip_hdr(skb);
+
+			if (ip->version == 4 && !brnf_call_iptables)
+				return NF_ACCEPT;
+			else if (ip->version == 6 && !brnf_call_ip6tables)
+				return NF_ACCEPT;
+			else if (!brnf_deferred_hooks)
+				return NF_ACCEPT;
+#endif
+			if (hook == NF_IP_POST_ROUTING)
+				return NF_ACCEPT;
+			if (!nf_bridge_alloc(skb))
+				return NF_DROP;
+		}
+
+		nf_bridge = skb->nf_bridge;
+
+		/* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we
+		 * will need the indev then. For a brouter, the real indev
+		 * can be a bridge port, so we make sure br_nf_local_out()
+		 * doesn't use the bridge parent of the indev by using
+		 * the BRNF_DONT_TAKE_PARENT mask. */
+		if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) {
+			nf_bridge->mask |= BRNF_DONT_TAKE_PARENT;
+			nf_bridge->physindev = (struct net_device *)in;
+		}
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+		/* the iptables outdev is br0.x, not br0 */
+		if (out->priv_flags & IFF_802_1Q_VLAN)
+			nf_bridge->netoutdev = (struct net_device *)out;
+#endif
+		return NF_STOP;
+	}
+
+	return NF_ACCEPT;
+}
+
 /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
  * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
  * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
@@ -886,6 +1013,36 @@
 	  .pf = PF_INET6,
 	  .hooknum = NF_IP6_PRE_ROUTING,
 	  .priority = NF_IP6_PRI_FIRST, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_FORWARD,
+	  .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_FORWARD,
+	  .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_LOCAL_OUT,
+	  .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_LOCAL_OUT,
+	  .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_POST_ROUTING,
+	  .priority = NF_IP_PRI_FIRST, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_POST_ROUTING,
+	  .priority = NF_IP6_PRI_FIRST, },
 };
 
 #ifdef CONFIG_SYSCTL
Index: net/netfilter/xt_physdev.c
===================================================================
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -110,16 +110,20 @@
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
 		return false;
-	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
+	if (brnf_deferred_hooks == 0 &&
+	    info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
 	    hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
 			 (1 << NF_IP_POST_ROUTING))) {
 		printk(KERN_WARNING "physdev match: using --physdev-out in the "
 		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
-		       "traffic is not supported anymore.\n");
-		if (hook_mask & (1 << NF_IP_LOCAL_OUT))
-			return false;
+		       "traffic is deprecated and breaks other things, it will "
+		       "be removed in January 2007. See Documentation/"
+		       "feature-removal-schedule.txt for details. This doesn't "
+		       "affect you in case you're using it for purely bridged "
+		       "traffic.\n");
+		brnf_deferred_hooks = 1;
 	}
 	return true;
 }

[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux 802.1Q VLAN]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Git]     [Bugtraq]     [Yosemite News and Information]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux PCI]     [Linux Admin]     [Samba]

  Powered by Linux