From: Vlad Yasevich <vyasevic@xxxxxxxxxx> This patch has been added to the 3.12 stable tree. If you have any objections, please let us know. =============== [ Upstream commit 0d5501c1c828fb97d02af50aa9d2b1a5498b94e4 ] Currently the functionality to untag traffic on input resides as part of the vlan module and is build only when VLAN support is enabled in the kernel. When VLAN is disabled, the function vlan_untag() turns into a stub and doesn't really untag the packets. This seems to create an interesting interaction between VMs supporting checksum offloading and some network drivers. There are some drivers that do not allow the user to change tx-vlan-offload feature of the driver. These drivers also seem to assume that any VLAN-tagged traffic they transmit will have the vlan information in the vlan_tci and not in the vlan header already in the skb. When transmitting skbs that already have tagged data with partial checksum set, the checksum doesn't appear to be updated correctly by the card thus resulting in a failure to establish TCP connections. The following is a packet trace taken on the receiver where a sender is a VM with a VLAN configued. The host VM is running on doest not have VLAN support and the outging interface on the host is tg3: 10:12:43.503055 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q (0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27243, offset 0, flags [DF], proto TCP (6), length 60) 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect -> 0x48d9), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val 4294837885 ecr 0,nop,wscale 7], length 0 10:12:44.505556 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q (0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27244, offset 0, flags [DF], proto TCP (6), length 60) 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect -> 0x44ee), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val 4294838888 ecr 0,nop,wscale 7], length 0 This connection finally times out. I've only access to the TG3 hardware in this configuration thus have only tested this with TG3 driver. There are a lot of other drivers that do not permit user changes to vlan acceleration features, and I don't know if they all suffere from a similar issue. The patch attempt to fix this another way. It moves the vlan header stipping code out of the vlan module and always builds it into the kernel network core. This way, even if vlan is not supported on a virtualizatoin host, the virtual machines running on top of such host will still work with VLANs enabled. CC: Patrick McHardy <kaber@xxxxxxxxx> CC: Nithin Nayak Sujir <nsujir@xxxxxxxxxxxx> CC: Michael Chan <mchan@xxxxxxxxxxxx> CC: Jiri Pirko <jiri@xxxxxxxxxxx> Signed-off-by: Vladislav Yasevich <vyasevic@xxxxxxxxxx> Acked-by: Jiri Pirko <jiri@xxxxxxxxxxx> Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx> Signed-off-by: Jiri Slaby <jslaby@xxxxxxx> --- include/linux/if_vlan.h | 6 ------ include/linux/skbuff.h | 1 + net/8021q/vlan_core.c | 53 ------------------------------------------------- net/bridge/br_vlan.c | 2 +- net/core/dev.c | 2 +- net/core/netpoll.c | 2 +- net/core/skbuff.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 57 insertions(+), 62 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 715c343f7c00..0bd3943d759f 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -90,7 +90,6 @@ extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern bool vlan_do_receive(struct sk_buff **skb); -extern struct sk_buff *vlan_untag(struct sk_buff *skb); extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid); extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid); @@ -126,11 +125,6 @@ static inline bool vlan_do_receive(struct sk_buff **skb) return false; } -static inline struct sk_buff *vlan_untag(struct sk_buff *skb) -{ - return skb; -} - static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { return 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9995165ff3d0..2960dab8b6fc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2394,6 +2394,7 @@ extern struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); +struct sk_buff *skb_vlan_untag(struct sk_buff *skb); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 7e57135c7cc4..5d56e05d83dd 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -106,59 +106,6 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); -static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) -{ - if (skb_cow(skb, skb_headroom(skb)) < 0) { - kfree_skb(skb); - return NULL; - } - - memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); - skb->mac_header += VLAN_HLEN; - return skb; -} - -struct sk_buff *vlan_untag(struct sk_buff *skb) -{ - struct vlan_hdr *vhdr; - u16 vlan_tci; - - if (unlikely(vlan_tx_tag_present(skb))) { - /* vlan_tci is already set-up so leave this for another time */ - return skb; - } - - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(!skb)) - goto err_free; - - if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) - goto err_free; - - vhdr = (struct vlan_hdr *) skb->data; - vlan_tci = ntohs(vhdr->h_vlan_TCI); - __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); - - skb_pull_rcsum(skb, VLAN_HLEN); - vlan_set_encap_proto(skb, vhdr); - - skb = vlan_reorder_header(skb); - if (unlikely(!skb)) - goto err_free; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb_reset_mac_len(skb); - - return skb; - -err_free: - kfree_skb(skb); - return NULL; -} -EXPORT_SYMBOL(vlan_untag); - - /* * vlan info and vid list */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 12ce54c0e8ed..da186c2ba036 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -136,7 +136,7 @@ static struct sk_buff *br_vlan_untag(struct sk_buff *skb) } skb->vlan_tci = 0; - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (skb) skb->vlan_tci = 0; diff --git a/net/core/dev.c b/net/core/dev.c index ef2f239cc322..4b1f8d02c68f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3517,7 +3517,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || skb->protocol == cpu_to_be16(ETH_P_8021AD)) { - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (unlikely(!skb)) goto unlock; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9b40f234b802..9d42f3bc5ee9 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -785,7 +785,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) } if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (unlikely(!skb)) goto out; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 174ebd563868..a8cf33868f9c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -62,6 +62,7 @@ #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> +#include <linux/if_vlan.h> #include <net/protocol.h> #include <net/dst.h> @@ -3552,3 +3553,55 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) return shinfo->gso_size; } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); + +static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) +{ + if (skb_cow(skb, skb_headroom(skb)) < 0) { + kfree_skb(skb); + return NULL; + } + + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + return skb; +} + +struct sk_buff *skb_vlan_untag(struct sk_buff *skb) +{ + struct vlan_hdr *vhdr; + u16 vlan_tci; + + if (unlikely(vlan_tx_tag_present(skb))) { + /* vlan_tci is already set-up so leave this for another time */ + return skb; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto err_free; + + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) + goto err_free; + + vhdr = (struct vlan_hdr *)skb->data; + vlan_tci = ntohs(vhdr->h_vlan_TCI); + __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); + + skb_pull_rcsum(skb, VLAN_HLEN); + vlan_set_encap_proto(skb, vhdr); + + skb = skb_reorder_vlan_header(skb); + if (unlikely(!skb)) + goto err_free; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_reset_mac_len(skb); + + return skb; + +err_free: + kfree_skb(skb); + return NULL; +} +EXPORT_SYMBOL(skb_vlan_untag); -- 2.1.1 -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html