Re: [PATCH 01/10] core: Split out UFO6 support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/18/2014 12:35 PM, Michael S. Tsirkin wrote:
> On Thu, Dec 18, 2014 at 10:01:35AM -0500, Vlad Yasevich wrote:
>> On 12/18/2014 02:54 AM, Michael S. Tsirkin wrote:
>>> Cc Dave, pls remember to do this next time otherwise
>>> your patches won't get merged :)
>>>
>>> On Wed, Dec 17, 2014 at 06:31:50PM -0500, Vlad Yasevich wrote:
>>>> On 12/17/2014 05:45 PM, Michael S. Tsirkin wrote:
>>>>> On Wed, Dec 17, 2014 at 01:20:46PM -0500, Vladislav Yasevich wrote:
>>>>>> Split IPv6 support for UFO into its own feature similiar to TSO.
>>>>>> This will later allow us to re-enable UFO support for virtio-net
>>>>>> devices.
>>>>>>
>>>>>> Signed-off-by: Vladislav Yasevich <vyasevic@xxxxxxxxxx>
>>>>>> ---
>>>>>>  include/linux/netdev_features.h |  7 +++++--
>>>>>>  include/linux/netdevice.h       |  1 +
>>>>>>  include/linux/skbuff.h          |  1 +
>>>>>>  net/core/dev.c                  | 35 +++++++++++++++++++----------------
>>>>>>  net/core/ethtool.c              |  2 +-
>>>>>>  5 files changed, 27 insertions(+), 19 deletions(-)
>>>>>>
>>>>>> diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
>>>>>> index dcfdecb..a078945 100644
>>>>>> --- a/include/linux/netdev_features.h
>>>>>> +++ b/include/linux/netdev_features.h
>>>>>> @@ -48,8 +48,9 @@ enum {
>>>>>>  	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
>>>>>>  	NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
>>>>>>  	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
>>>>>> +	NETIF_F_UFO6_BIT,		/* ... UDPv6 fragmentation */
>>>>>>  	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
>>>>>> -		NETIF_F_GSO_MPLS_BIT,
>>>>>> +		NETIF_F_UFO6_BIT,
>>>>>>  
>>>>>>  	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
>>>>>>  	NETIF_F_SCTP_CSUM_BIT,		/* SCTP checksum offload */
>>>>>> @@ -109,6 +110,7 @@ enum {
>>>>>>  #define NETIF_F_TSO_ECN		__NETIF_F(TSO_ECN)
>>>>>>  #define NETIF_F_TSO		__NETIF_F(TSO)
>>>>>>  #define NETIF_F_UFO		__NETIF_F(UFO)
>>>>>> +#define NETIF_F_UFO6		__NETIF_F(UFO6)
>>>>>>  #define NETIF_F_VLAN_CHALLENGED	__NETIF_F(VLAN_CHALLENGED)
>>>>>>  #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
>>>>>>  #define NETIF_F_RXALL		__NETIF_F(RXALL)
>>>>>> @@ -141,7 +143,7 @@ enum {
>>>>>>  
>>>>>>  /* List of features with software fallbacks. */
>>>>>>  #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
>>>>>> -				 NETIF_F_TSO6 | NETIF_F_UFO)
>>>>>> +				 NETIF_F_TSO6 | NETIF_F_UFO | NETIF_F_UFO6)
>>>>>>  
>>>>>>  #define NETIF_F_GEN_CSUM	NETIF_F_HW_CSUM
>>>>>>  #define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
>>>>>> @@ -149,6 +151,7 @@ enum {
>>>>>>  #define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
>>>>>>  
>>>>>>  #define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
>>>>>> +#define NETIF_F_ALL_UFO		(NETIF_F_UFO | NETIF_F_UFO6)
>>>>>>  
>>>>>>  #define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
>>>>>>  				 NETIF_F_FSO)
>>>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>>>> index 74fd5d3..86af10a 100644
>>>>>> --- a/include/linux/netdevice.h
>>>>>> +++ b/include/linux/netdevice.h
>>>>>> @@ -3559,6 +3559,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
>>>>>>  	/* check flags correspondence */
>>>>>>  	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
>>>>>>  	BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
>>>>>> +	BUILD_BUG_ON(SKB_GSO_UDP6    != (NETIF_F_UFO6 >> NETIF_F_GSO_SHIFT));
>>>>>>  	BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
>>>>>>  	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
>>>>>>  	BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
>>>>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>>>>>> index 6c8b6f6..8538b67 100644
>>>>>> --- a/include/linux/skbuff.h
>>>>>> +++ b/include/linux/skbuff.h
>>>>>> @@ -372,6 +372,7 @@ enum {
>>>>>>  
>>>>>>  	SKB_GSO_MPLS = 1 << 12,
>>>>>>  
>>>>>> +	SKB_GSO_UDP6 = 1 << 13
>>>>>>  };
>>>>>>  
>>>>>>  #if BITS_PER_LONG > 32
>>>>>
>>>>> So this implies anything getting GSO packets e.g.
>>>>> from userspace now needs to check IP version to
>>>>> set GSO type correctly.
>>>>>
>>>>> I think you missed some places that do this, e.g. af_packet
>>>>> sockets.
>>>>>
>>>>
>>>> I looked at af_packet sockets and they set this only in the event
>>>> vnet header has been used with a GSO type.  In this case, the user
>>>> already knows the the type.
>>>
>>> Imagine you are receiving a packet:
>>>
>>>                 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
>>>                         switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
>>>                         case VIRTIO_NET_HDR_GSO_TCPV4:
>>>                                 gso_type = SKB_GSO_TCPV4;
>>>                                 break;
>>>                         case VIRTIO_NET_HDR_GSO_TCPV6:
>>>                                 gso_type = SKB_GSO_TCPV6;
>>>                                 break;
>>>                         case VIRTIO_NET_HDR_GSO_UDP:
>>>                                 gso_type = SKB_GSO_UDP;
>>>                                 break;
>>>                         default:
>>>                                 goto out_unlock;
>>>                         }
>>>
>>>                         if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
>>>                                 gso_type |= SKB_GSO_TCP_ECN;
>>>
>>>                         if (vnet_hdr.gso_size == 0)
>>>                                 goto out_unlock;
>>>
>>>                 }
>>>
>>> we used to report UFO6 as SKB_GSO_UDP, we probably
>>> should keep doing this, with your patch we select the
>>> goto out_unlock path.
>>>
>>>
>>
>> No.  The vnet_hdr.gso_type is still going to be VIRTIO_NET_HDR_GSO_UDP
>> since the UDP6 version isn't defined yet.  So, it will be marked as
>> GSO_UDP.
> 
> I pasted the wrong snippet.
> I meant this:
> 
>                         /* This is a hint as to how much should be * linear. */
>                         vnet_hdr.hdr_len = __cpu_to_virtio16(false, skb_headlen(skb));
>                         vnet_hdr.gso_size = __cpu_to_virtio16(false, sinfo->gso_size);
>                         if (sinfo->gso_type & SKB_GSO_TCPV4)
>                                 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
>                         else if (sinfo->gso_type & SKB_GSO_TCPV6)
>                                 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
>                         else if (sinfo->gso_type & SKB_GSO_UDP)
>                                 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
>                         else if (sinfo->gso_type & SKB_GSO_FCOE)
>                                 goto out_free;
>                         else
>                                 BUG();
> 
> so if we get SKB_GSO_UDP we'll get BUG().
> 
> 
> 
>> This code most likely needs the same workaround as exists in tap and macvtap,
>> i.e select the proxy fragment id and decide what to do with gso_type.
> 
> And fixup type to GSO_UDP6 while we are at it.
> 
>>>
>>>> It is true that with this series af_packets now can't do IPv6 UFO
>>>> since there is no VIRTIO_NET_HDR_GSO_UDPV6 yet.
>>>
>>> What do you mean by "do".
>>
>> What I mean is that AF_PACKET sockets currently do not do IPv6 UFO
>> correctly, even after Ben's fixes to tap/macvtap.  There is no
>> proxy fragment id selection in af_packet case and we have the
>> same problem Ben was trying address for tap/macvtap.
>>> Are we talking about sending or receiving packets?
>>
>> I am talking about sending, see above.
>>
>>> You seem to conflate the two.
>>>
>>> We always discarded ID on RX.
>>>
>>> For tun, this is xmit, so just by saying "this device can
>>> not do UFO" you will never get short packets.
>>>
>>
>> You must mean long packets.
> 
> Yes.
> 
>> This is actually an issue I've been thinking
>> about. With with your suggestion of switching the GSO type for legacy
>> applications we end up with fragments for IPv6 traffic.   As a result,
>> legacy VMs will see a regression for large IPv6 datagrams.
> 
> I'm not sure what's meant by my suggestion here :)

What I am referring to here is to change the gso_type to UDPV6 in tap/macvtap
when passing packet to the host.

> It seems clear that legacy applications don't want to get IPv6
> fragment IDs in virtio header. Either we pass them plain ethernet
> packets or assume they are ok with discarding the IDs even
> if we set GSO_UDP.

So, I am not try to pass fragment IDs yet.  I am trying to make sure that
older gueest that assume that UFO == UFO4 + UFO6 continue to work and do not
regress.  At the same time, I want to enable UFO(4) for new guests.

If we set gso_type to SKB_GSO_UDPV6 before passing the data to the forwarding
path of the host, then this will cause IPv6 fragmentation.  If we leave gso_type
as SKB_GSO_UDP, then older VMs will continue to communicate using large UDP
data packets.

Later when VMs support UFO6, when UFO6 is enabled, the fragment id can be communicated.
But that's later.

> 
>>>
>>>>
>>>> I suppose we could do something similar there as we do in tun code/macvtap code.
>>>> If that's the case, it currently broken as well.
>>>>
>>>> -vlad
>>>
>>>
>>> Broken is a big word.
>>>
>>> Let's stop conflating two directions.
>>
>> I am not and was talking only about af_packet as that is what you asked about.
>> There is no tun/macvtap in play here.  They are handled separately in their
>> respective drivers.
>>
>>>
>>> Here's the way I look at it:
>>>
>>> 1. Userspace doesn't have a way to get fragment IDs
>>> from tun/macvtap/packet sockets.
>>> Presumably, not all users need these IDs.
>>> E.g. old guests clearly don't.
>>>
>>> We should either give them packets stripping the ID,
>>> like we always did, or make sure they never get these packets.
>>> Second option seems achievable for tun if we
>>> never tell linux it can do UFO, but I don't see
>>> how to do it for macvtap and packet socket.
>>>
>>
>> macvtap is slightly problematic, but doable with some tricks.
>> packet socket is an interesting problem.  The only way
>> an af_packet socket can receive an skb marked SKB_GSO_UDPV6
>> is if someone else on the host sent it (like another guest).
> 
> Or if a NIC set it.

OK, virtio seems to be the only nic doing this... You are right, I
need to cover that scenario.

-vlad

> 
>> Since there is are no feature or vnet header length negotiations,
>> it is impossible to tell if an application using this af_packet
>> socket is capable of processing VIRTIO_NET_HDR_GSO_UDPV6
>> type (yet to be defined).
>>
>> So, we can either use existing VIRTIO_NET_HDR_GSO_UDP on receive
>> path, add some kind of negotiation logic to packet socket (like
>> storing the application expected vnet header size), or perform
>> IPv6 fragmentation somehow.
>>
>> Options 1 or 2 are doable.
> 
> 1 is using VIRTIO_NET_HDR_GSO_UDP and discarding ID,
> 2 is "some kind of negotiation logic"?
> 2 can't be enough, you will need 1 as well.
> 
> So let's start with 1 as a first step.
> 
> 
> 
> 
>>>
>>> 2. Userspace doesn't have a way to set fragment IDs
>>> for tun/macvtap/packet sockets.
>>> Presumably, not all users have these IDs.  E.g. old
>>> guests clearly don't.
>>>
>>> We should either generate our own ID,
>>> like we always did, or make sure we don't accept
>>> these packets.
>>> Second option is almost sure to break userspace,
>>> so it seems we must do the first one.
>>>
>>
>> Right.  This was missing from packet sockets.  I can fix it.
>>
>> -vlad
> 
> Also, this can't be a patch on top, since we don't
> want bisect to give us configurations which
> can BUG().
> 
> 
>>>
>>> 3.
>>> Exactly the same two things if you replace userspace
>>> with hypervisor and tun/macvtap/packet socket with
>>> virtio device.
>>>
>>>
>>> 4. 
>>> As a next step, we should add a way for userspace
>>> to tell us that it has ids and can handle them.
>>>
>>>
>>>
>>>
>>>
>>>>>
>>>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>>>> index 945bbd0..fa4d2ee 100644
>>>>>> --- a/net/core/dev.c
>>>>>> +++ b/net/core/dev.c
>>>>>> @@ -5929,6 +5929,12 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
>>>>>>  		features &= ~NETIF_F_ALL_TSO;
>>>>>>  	}
>>>>>>  
>>>>>> +	/* UFO requires that SG is present as well */
>>>>>> +	if ((features & NETIF_F_ALL_UFO) && !(features & NETIF_F_SG)) {
>>>>>> +		netdev_dbg(dev, "Dropping UFO features since no SG feature.\n");
>>>>>> +		features &= ~NETIF_F_ALL_UFO;
>>>>>> +	}
>>>>>> +
>>>>>>  	if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
>>>>>>  					!(features & NETIF_F_IP_CSUM)) {
>>>>>>  		netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
>>>>>> @@ -5952,24 +5958,21 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
>>>>>>  		features &= ~NETIF_F_GSO;
>>>>>>  	}
>>>>>>  
>>>>>> -	/* UFO needs SG and checksumming */
>>>>>> -	if (features & NETIF_F_UFO) {
>>>>>> -		/* maybe split UFO into V4 and V6? */
>>>>>> -		if (!((features & NETIF_F_GEN_CSUM) ||
>>>>>> -		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
>>>>>> -			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
>>>>>> -			netdev_dbg(dev,
>>>>>> -				"Dropping NETIF_F_UFO since no checksum offload features.\n");
>>>>>> -			features &= ~NETIF_F_UFO;
>>>>>> -		}
>>>>>> -
>>>>>> -		if (!(features & NETIF_F_SG)) {
>>>>>> -			netdev_dbg(dev,
>>>>>> -				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
>>>>>> -			features &= ~NETIF_F_UFO;
>>>>>> -		}
>>>>>> +	/* UFO also needs checksumming */
>>>>>> +	if ((features & NETIF_F_UFO) && !(features & NETIF_F_GEN_CSUM) &&
>>>>>> +					!(features & NETIF_F_IP_CSUM)) {
>>>>>> +		netdev_dbg(dev,
>>>>>> +			   "Dropping NETIF_F_UFO since no checksum offload features.\n");
>>>>>> +		features &= ~NETIF_F_UFO;
>>>>>> +	}
>>>>>> +	if ((features & NETIF_F_UFO6) && !(features & NETIF_F_GEN_CSUM) &&
>>>>>> +					 !(features & NETIF_F_IPV6_CSUM)) {
>>>>>> +		netdev_dbg(dev,
>>>>>> +			   "Dropping NETIF_F_UFO6 since no checksum offload features.\n");
>>>>>> +		features &= ~NETIF_F_UFO6;
>>>>>>  	}
>>>>>>  
>>>>>> +
>>>>>>  #ifdef CONFIG_NET_RX_BUSY_POLL
>>>>>>  	if (dev->netdev_ops->ndo_busy_poll)
>>>>>>  		features |= NETIF_F_BUSY_POLL;
>>>>>> diff --git a/net/core/ethtool.c b/net/core/ethtool.c
>>>>>> index 06dfb29..93eff41 100644
>>>>>> --- a/net/core/ethtool.c
>>>>>> +++ b/net/core/ethtool.c
>>>>>> @@ -223,7 +223,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
>>>>>>  		return NETIF_F_ALL_TSO;
>>>>>>  	case ETHTOOL_GUFO:
>>>>>>  	case ETHTOOL_SUFO:
>>>>>> -		return NETIF_F_UFO;
>>>>>> +		return NETIF_F_ALL_UFO;
>>>>>>  	case ETHTOOL_GGSO:
>>>>>>  	case ETHTOOL_SGSO:
>>>>>>  		return NETIF_F_GSO;
>>>>>> -- 
>>>>>> 1.9.3

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux