On Dec 19, 2007 6:00 PM, Damien Thébault <damien.thebault@xxxxxxxxx> wrote: > Hello, > > I sent the quoted mail to linux-net with my problem yesterday, but I > did a git bisect today and I got the following output : > > > 2bf540b73ed5b304e84bb4d4c390d49d1cfa0ef8 is first bad commit > > commit 2bf540b73ed5b304e84bb4d4c390d49d1cfa0ef8 > > Author: Patrick McHardy <kaber@xxxxxxxxx> > > Date: Wed Dec 13 16:54:25 2006 -0800 > > > > [NETFILTER]: bridge-netfilter: remove deferred hooks > > > > Remove the deferred hooks and all related code as scheduled in > > feature-removal-schedule. > > > > Signed-off-by: Patrick McHardy <kaber@xxxxxxxxx> > > Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx> > > > > :040000 040000 c49ea947455937566b6129991dde5e86f2453aae 6611736ce5c0fcde7627494b66b9ea94e37ea42e M Documentation > > :040000 040000 d0dd0700fe68f98b52687be3a0c31d73f7b15b81 f8ddf15a0389c5f5b7f2c11d7d0db039a660e1d5 M include > > :040000 040000 dafccf7ff8657be9adca6b28dbd365cdd6c01ca5 3eeb1cb4b16cc5cb698ab559b47ea6b0991d4d3a M net > This morning I reverted the patch and ported it to work with the net-2.6.25 and 2.6.23 kernels. With it, the behaviour seems to be good again (I didn't test a lot so I don't know if anything else is broken by this). I don't know yet if it solves my RTSP conntrack problem too. (Yes I know this is not really the good way to handle this, but since the removal of the deferred hooks before 2.6.20, there was a lot of changes in this area, so I just tried to see if it was working with the current kernel) I'm attaching the two patchs if anyone needs it. -- Damien Thebault
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 0ae682b..919f331 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -181,6 +181,22 @@ Who: Nick Piggin <npiggin@xxxxxxx> --------------------------- +What: Bridge netfilter deferred IPv4/IPv6 output hook calling +When: January 2007 +Why: The deferred output hooks are a layering violation causing unusual + and broken behaviour on bridge devices. Examples of things they + break include QoS classifation using the MARK or CLASSIFY targets, + the IPsec policy match and connection tracking with VLANs on a + bridge. Their only use is to enable bridge output port filtering + within iptables with the physdev match, which can also be done by + combining iptables and ebtables using netfilter marks. Until it + will get removed the hook deferral is disabled by default and is + only enabled when needed. + +Who: Patrick McHardy <kaber@xxxxxxxxx> + +--------------------------- + What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment When: October 2008 Why: The stacking of class devices makes these values misleading and diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 499aa93..d9487b9 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -83,6 +83,7 @@ struct bridge_skb_cb { } daddr; }; +extern int brnf_deferred_hooks; #else #define nf_bridge_maybe_copy_header(skb) (0) #define nf_bridge_pad(skb) (0) diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 9a10092..b96952a 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -36,6 +36,7 @@ #define NFC_IP_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP Hooks */ /* After promisc drops, checksum checks. */ @@ -49,7 +50,6 @@ /* Packets about to hit the wire. */ #define NF_IP_POST_ROUTING 4 #define NF_IP_NUMHOOKS 5 -#endif /* ! __KERNEL__ */ enum nf_ip_hook_priorities { NF_IP_PRI_FIRST = INT_MIN, @@ -57,8 +57,10 @@ enum nf_ip_hook_priorities { NF_IP_PRI_RAW = -300, NF_IP_PRI_SELINUX_FIRST = -225, NF_IP_PRI_CONNTRACK = -200, + NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175, NF_IP_PRI_MANGLE = -150, NF_IP_PRI_NAT_DST = -100, + NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50, NF_IP_PRI_FILTER = 0, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 3475a65..07133c9 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -40,6 +40,7 @@ #define NFC_IP6_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP6_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP6 Hooks */ /* After promisc drops, checksum checks. */ @@ -53,7 +54,6 @@ /* Packets about to hit the wire. */ #define NF_IP6_POST_ROUTING 4 #define NF_IP6_NUMHOOKS 5 -#endif /* ! __KERNEL__ */ enum nf_ip6_hook_priorities { @@ -61,8 +61,10 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_CONNTRACK_DEFRAG = -400, NF_IP6_PRI_SELINUX_FIRST = -225, NF_IP6_PRI_CONNTRACK = -200, + NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175, NF_IP6_PRI_MANGLE = -150, NF_IP6_PRI_NAT_DST = -100, + NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50, NF_IP6_PRI_FILTER = 0, NF_IP6_PRI_NAT_SRC = 100, NF_IP6_PRI_SELINUX_LAST = 225, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 32ac035..86131b4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -65,6 +65,9 @@ static int brnf_filter_pppoe_tagged __read_mostly = 1; #define brnf_filter_pppoe_tagged 1 #endif +int brnf_deferred_hooks; +EXPORT_SYMBOL_GPL(brnf_deferred_hooks); + static inline __be16 vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -689,46 +692,109 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -/* PF_BRIDGE/LOCAL_OUT *********************************************** - * - * This function sees both locally originated IP packets and forwarded +/* PF_BRIDGE/LOCAL_OUT ***********************************************/ +static int br_nf_local_out_finish(struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->network_header -= VLAN_HLEN; + } + + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + br_forward_finish, NF_BR_PRI_FIRST + 1); + + return 0; +} + +/* This function sees both locally originated IP packets and forwarded * IP packets (in both cases the destination device is a bridge * device). It also sees bridged-and-DNAT'ed packets. + * To be able to filter on the physical bridge devices (with the physdev + * module), we steal packets destined to a bridge device away from the + * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, + * when we have determined the real output device. This is done in here. * * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. - */ + * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched + * this packet before, and so the packet was locally originated. We fake + * the PF_INET/LOCAL_OUT hook. + * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, + * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure + * even routed packets that didn't arrive on a bridge interface have their + * nf_bridge->physindev set. */ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct net_device *realindev; + struct net_device *realindev, *realoutdev; struct nf_bridge_info *nf_bridge; + int pf; if (!skb->nf_bridge) return NF_ACCEPT; + if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) + pf = PF_INET; + else + pf = PF_INET6; + nf_bridge = skb->nf_bridge; - if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT)) - return NF_ACCEPT; + nf_bridge->physoutdev = skb->dev; + realindev = nf_bridge->physindev; /* Bridged, take PF_BRIDGE/FORWARD. * (see big note in front of br_nf_pre_routing_finish) */ - nf_bridge->physoutdev = skb->dev; - realindev = nf_bridge->physindev; + if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + if (skb->protocol == htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->network_header -= VLAN_HLEN; + } - if (nf_bridge->mask & BRNF_PKT_TYPE) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, + skb->dev, br_forward_finish); + goto out; } - nf_bridge_push_encap_header(skb); + realoutdev = bridge_parent(skb->dev); + if (!realoutdev) + return NF_DROP; + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* iptables should match -o br0.x */ + if (nf_bridge->netoutdev) + realoutdev = nf_bridge->netoutdev; +#endif + if (skb->protocol == htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->network_header += VLAN_HLEN; + } + /* IP forwarded traffic has a physindev, locally + * generated traffic hasn't. */ + if (realindev != NULL) { + if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) { + struct net_device *parent = bridge_parent(realindev); + if (parent) + realindev = parent; + } - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, - br_forward_finish); + NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, + realoutdev, br_nf_local_out_finish, + NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); + } else { + NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, + realoutdev, br_nf_local_out_finish, + NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); + } + +out: return NF_STOLEN; } @@ -834,6 +900,67 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; } +/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT + * and PF_INET(6)/POST_ROUTING until we have done the forwarding + * decision in the bridge code and have determined nf_bridge->physoutdev. */ +static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if ((out->hard_start_xmit == br_dev_xmit && + okfn != br_nf_forward_finish && + okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit) +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + || ((out->priv_flags & IFF_802_1Q_VLAN) && + VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) +#endif + ) { + struct nf_bridge_info *nf_bridge; + + if (!skb->nf_bridge) { +#ifdef CONFIG_SYSCTL + /* This code is executed while in the IP(v6) stack, + the version should be 4 or 6. We can't use + skb->protocol because that isn't set on + PF_INET(6)/LOCAL_OUT. */ + struct iphdr *ip = ip_hdr(skb); + + if (ip->version == 4 && !brnf_call_iptables) + return NF_ACCEPT; + else if (ip->version == 6 && !brnf_call_ip6tables) + return NF_ACCEPT; + else if (!brnf_deferred_hooks) + return NF_ACCEPT; +#endif + if (hook == NF_IP_POST_ROUTING) + return NF_ACCEPT; + if (!nf_bridge_alloc(skb)) + return NF_DROP; + } + + nf_bridge = skb->nf_bridge; + + /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we + * will need the indev then. For a brouter, the real indev + * can be a bridge port, so we make sure br_nf_local_out() + * doesn't use the bridge parent of the indev by using + * the BRNF_DONT_TAKE_PARENT mask. */ + if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { + nf_bridge->mask |= BRNF_DONT_TAKE_PARENT; + nf_bridge->physindev = (struct net_device *)in; + } +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* the iptables outdev is br0.x, not br0 */ + if (out->priv_flags & IFF_802_1Q_VLAN) + nf_bridge->netoutdev = (struct net_device *)out; +#endif + return NF_STOP; + } + + return NF_ACCEPT; +} + /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because @@ -879,6 +1006,36 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { .pf = PF_INET6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_FORWARD, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_FORWARD, + .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_FIRST, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_POST_ROUTING, + .priority = NF_IP6_PRI_FIRST, }, }; #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 678b683..6e7eb9b 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -104,16 +104,20 @@ physdev_mt_check(const char *tablename, const void *ip, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return false; - if (info->bitmask & XT_PHYSDEV_OP_OUT && + if (brnf_deferred_hooks == 0 && + info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { printk(KERN_WARNING "physdev match: using --physdev-out in the " "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " - "traffic is not supported anymore.\n"); - if (hook_mask & (1 << NF_INET_LOCAL_OUT)) - return false; + "traffic is deprecated and breaks other things, it will " + "be removed in January 2007. See Documentation/" + "feature-removal-schedule.txt for details. This doesn't " + "affect you in case you're using it for purely bridged " + "traffic.\n"); + brnf_deferred_hooks = 1; } return true; }
Index: Documentation/feature-removal-schedule.txt =================================================================== --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -154,6 +154,22 @@ --------------------------- +What: Bridge netfilter deferred IPv4/IPv6 output hook calling +When: January 2007 +Why: The deferred output hooks are a layering violation causing unusual + and broken behaviour on bridge devices. Examples of things they + break include QoS classifation using the MARK or CLASSIFY targets, + the IPsec policy match and connection tracking with VLANs on a + bridge. Their only use is to enable bridge output port filtering + within iptables with the physdev match, which can also be done by + combining iptables and ebtables using netfilter marks. Until it + will get removed the hook deferral is disabled by default and is + only enabled when needed. + +Who: Patrick McHardy <kaber@xxxxxxxxx> + +--------------------------- + What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment When: October 2008 Why: The stacking of class devices makes these values misleading and Index: include/linux/netfilter_bridge.h =================================================================== --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -82,6 +82,7 @@ } daddr; }; +extern int brnf_deferred_hooks; #else #define nf_bridge_maybe_copy_header(skb) (0) #define nf_bridge_pad(skb) (0) Index: include/linux/netfilter_ipv4.h =================================================================== --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -57,8 +57,10 @@ NF_IP_PRI_RAW = -300, NF_IP_PRI_SELINUX_FIRST = -225, NF_IP_PRI_CONNTRACK = -200, + NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175, NF_IP_PRI_MANGLE = -150, NF_IP_PRI_NAT_DST = -100, + NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50, NF_IP_PRI_FILTER = 0, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, Index: include/linux/netfilter_ipv6.h =================================================================== --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -62,8 +62,10 @@ NF_IP6_PRI_CONNTRACK_DEFRAG = -400, NF_IP6_PRI_SELINUX_FIRST = -225, NF_IP6_PRI_CONNTRACK = -200, + NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175, NF_IP6_PRI_MANGLE = -150, NF_IP6_PRI_NAT_DST = -100, + NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50, NF_IP6_PRI_FILTER = 0, NF_IP6_PRI_NAT_SRC = 100, NF_IP6_PRI_SELINUX_LAST = 225, Index: net/bridge/br_netfilter.c =================================================================== --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -65,6 +65,9 @@ #define brnf_filter_pppoe_tagged 1 #endif +int brnf_deferred_hooks; +EXPORT_SYMBOL_GPL(brnf_deferred_hooks); + static inline __be16 vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -697,47 +700,110 @@ return NF_STOLEN; } -/* PF_BRIDGE/LOCAL_OUT *********************************************** - * - * This function sees both locally originated IP packets and forwarded +/* PF_BRIDGE/LOCAL_OUT ***********************************************/ +static int br_nf_local_out_finish(struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->network_header -= VLAN_HLEN; + } + + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + br_forward_finish, NF_BR_PRI_FIRST + 1); + + return 0; +} + +/* This function sees both locally originated IP packets and forwarded * IP packets (in both cases the destination device is a bridge * device). It also sees bridged-and-DNAT'ed packets. + * To be able to filter on the physical bridge devices (with the physdev + * module), we steal packets destined to a bridge device away from the + * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, + * when we have determined the real output device. This is done in here. * * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. - */ + * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched + * this packet before, and so the packet was locally originated. We fake + * the PF_INET/LOCAL_OUT hook. + * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, + * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure + * even routed packets that didn't arrive on a bridge interface have their + * nf_bridge->physindev set. */ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct net_device *realindev; + struct net_device *realindev, *realoutdev; struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; + int pf; if (!skb->nf_bridge) return NF_ACCEPT; + if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) + pf = PF_INET; + else + pf = PF_INET6; + nf_bridge = skb->nf_bridge; - if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT)) - return NF_ACCEPT; + nf_bridge->physoutdev = skb->dev; + realindev = nf_bridge->physindev; /* Bridged, take PF_BRIDGE/FORWARD. * (see big note in front of br_nf_pre_routing_finish) */ - nf_bridge->physoutdev = skb->dev; - realindev = nf_bridge->physindev; + if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + if (skb->protocol == htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->network_header -= VLAN_HLEN; + } - if (nf_bridge->mask & BRNF_PKT_TYPE) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, + skb->dev, br_forward_finish); + goto out; } - nf_bridge_push_encap_header(skb); + realoutdev = bridge_parent(skb->dev); + if (!realoutdev) + return NF_DROP; + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* iptables should match -o br0.x */ + if (nf_bridge->netoutdev) + realoutdev = nf_bridge->netoutdev; +#endif + if (skb->protocol == htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->network_header += VLAN_HLEN; + } + /* IP forwarded traffic has a physindev, locally + * generated traffic hasn't. */ + if (realindev != NULL) { + if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) { + struct net_device *parent = bridge_parent(realindev); + if (parent) + realindev = parent; + } - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, - br_forward_finish); + NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, + realoutdev, br_nf_local_out_finish, + NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); + } else { + NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, + realoutdev, br_nf_local_out_finish, + NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); + } + +out: return NF_STOLEN; } @@ -841,6 +907,69 @@ return NF_ACCEPT; } +/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT + * and PF_INET(6)/POST_ROUTING until we have done the forwarding + * decision in the bridge code and have determined nf_bridge->physoutdev. */ +static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + + if ((out->hard_start_xmit == br_dev_xmit && + okfn != br_nf_forward_finish && + okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit) +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + || ((out->priv_flags & IFF_802_1Q_VLAN) && + VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) +#endif + ) { + struct nf_bridge_info *nf_bridge; + + if (!skb->nf_bridge) { +#ifdef CONFIG_SYSCTL + /* This code is executed while in the IP(v6) stack, + the version should be 4 or 6. We can't use + skb->protocol because that isn't set on + PF_INET(6)/LOCAL_OUT. */ + struct iphdr *ip = ip_hdr(skb); + + if (ip->version == 4 && !brnf_call_iptables) + return NF_ACCEPT; + else if (ip->version == 6 && !brnf_call_ip6tables) + return NF_ACCEPT; + else if (!brnf_deferred_hooks) + return NF_ACCEPT; +#endif + if (hook == NF_IP_POST_ROUTING) + return NF_ACCEPT; + if (!nf_bridge_alloc(skb)) + return NF_DROP; + } + + nf_bridge = skb->nf_bridge; + + /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we + * will need the indev then. For a brouter, the real indev + * can be a bridge port, so we make sure br_nf_local_out() + * doesn't use the bridge parent of the indev by using + * the BRNF_DONT_TAKE_PARENT mask. */ + if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { + nf_bridge->mask |= BRNF_DONT_TAKE_PARENT; + nf_bridge->physindev = (struct net_device *)in; + } +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* the iptables outdev is br0.x, not br0 */ + if (out->priv_flags & IFF_802_1Q_VLAN) + nf_bridge->netoutdev = (struct net_device *)out; +#endif + return NF_STOP; + } + + return NF_ACCEPT; +} + /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because @@ -886,6 +1013,36 @@ .pf = PF_INET6, .hooknum = NF_IP6_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_FORWARD, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_FORWARD, + .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_FIRST, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_POST_ROUTING, + .priority = NF_IP6_PRI_FIRST, }, }; #ifdef CONFIG_SYSCTL Index: net/netfilter/xt_physdev.c =================================================================== --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -110,16 +110,20 @@ if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return false; - if (info->bitmask & XT_PHYSDEV_OP_OUT && + if (brnf_deferred_hooks == 0 && + info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | (1 << NF_IP_POST_ROUTING))) { printk(KERN_WARNING "physdev match: using --physdev-out in the " "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " - "traffic is not supported anymore.\n"); - if (hook_mask & (1 << NF_IP_LOCAL_OUT)) - return false; + "traffic is deprecated and breaks other things, it will " + "be removed in January 2007. See Documentation/" + "feature-removal-schedule.txt for details. This doesn't " + "affect you in case you're using it for purely bridged " + "traffic.\n"); + brnf_deferred_hooks = 1; } return true; }