Currently, the VIRTIO_NET_F_GUEST_CSUM (corresponds to NETIF_F_RXCSUM for netdev) feature of the virtio-net driver conflicts with the loading of XDP, which is caused by the problem described in [1][2], that is, XDP may cause errors in partial csumed-related fields which can lead to packet dropping. In addition, when communicating between vm and vm on the same host, the receiving side vm will receive packets marked as VIRTIO_NET_HDR_F_NEEDS_CSUM, but after these packets are processed by XDP, the VIRTIO_NET_HDR_F_NEEDS_CSUM and skb CHECKSUM_PARTIAL flags will be cleared, causing the packet dropping. This patch introduces a helper: 1. It will try to solve the above problems in the subsequent patch. 2. It parses UDP/TCP and calculates the pseudo-header checksum for virtio-net. virtio-net currently does not resolve VLANs nor SCTP CRC checksum offloading. [1] commit 18ba58e1c234 ("virtio-net: fail XDP set if guest csum is negotiated") [2] commit e59ff2c49ae1 ("virtio-net: disable guest csum during XDP set") Signed-off-by: Heng Qi <hengqi@xxxxxxxxxxxxxxxxx> Reviewed-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> --- drivers/net/virtio_net.c | 136 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5a7f7a76b920..83ab9257043a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -22,6 +22,7 @@ #include <net/route.h> #include <net/xdp.h> #include <net/net_failover.h> +#include <net/ip6_checksum.h> static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); @@ -1568,6 +1569,141 @@ static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); } +static int virtnet_flow_dissect_udp_tcp(struct virtnet_info *vi, struct sk_buff *skb) +{ + struct net_device *dev = vi->dev; + struct flow_keys_basic keys; + struct udphdr *uh; + struct tcphdr *th; + int len, offset; + + /* The flow dissector needs this information. */ + skb->dev = dev; + skb_reset_mac_header(skb); + skb->protocol = dev_parse_header_protocol(skb); + /* virtio-net does not need to resolve VLAN. */ + skb_set_network_header(skb, ETH_HLEN); + if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) + return -EINVAL; + + /* 1. Pseudo-header checksum calculation requires: + * (1) saddr/daddr (2) IP_PROTO (3) length of transport payload + * 2. We don't parse SCTP because virtio-net currently doesn't + * support CRC checksum offloading for SCTP. + */ + if (keys.basic.n_proto == htons(ETH_P_IP)) { + struct iphdr *iph; + + /* Flow dissector has verified that there is an IP header. + * So we do not need a pskb_may_pull(). + */ + iph = ip_hdr(skb); + if (iph->version != 4) + return -EINVAL; + + skb->transport_header = skb->mac_header + keys.control.thoff; + offset = skb_transport_offset(skb); + len = skb->len - offset; + if (keys.basic.ip_proto == IPPROTO_UDP) { + if (!pskb_may_pull(skb, offset + sizeof(struct udphdr))) + return -EINVAL; + + uh = udp_hdr(skb); + skb->csum_offset = offsetof(struct udphdr, check); + /* Although uh->len is already the 3rd parameter for the calculation + * of the pseudo-header checksum, we have already calculated the + * length of the transport layer, so use 'len' here directly. + */ + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, len, + IPPROTO_UDP, 0); + } else if (keys.basic.ip_proto == IPPROTO_TCP) { + if (!pskb_may_pull(skb, offset + sizeof(struct tcphdr))) + return -EINVAL; + + th = tcp_hdr(skb); + skb->csum_offset = offsetof(struct tcphdr, check); + th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, len, + IPPROTO_TCP, 0); + } /* virtio-net doesn't support checksums for SCTP crc hw offloading.*/ + } else if (keys.basic.n_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h; + + ip6h = ipv6_hdr(skb); + if (ip6h->version != 6) + return -EINVAL; + + /* We have skipped the possible extension headers for IPv6. + * If there is a Routing Header, the tx's check value is calculated by + * final_dst, and that value is the rx's daddr. + */ + skb->transport_header = skb->mac_header + keys.control.thoff; + offset = skb_transport_offset(skb); + len = skb->len - offset; + if (keys.basic.ip_proto == IPPROTO_UDP) { + if (!pskb_may_pull(skb, offset + sizeof(struct udphdr))) + return -EINVAL; + + uh = udp_hdr(skb); + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~csum_ipv6_magic((const struct in6_addr *)&ip6h->saddr, + (const struct in6_addr *)&ip6h->daddr, + len, IPPROTO_UDP, 0); + } else if (keys.basic.ip_proto == IPPROTO_TCP) { + if (!pskb_may_pull(skb, offset + sizeof(struct tcphdr))) + return -EINVAL; + + th = tcp_hdr(skb); + skb->csum_offset = offsetof(struct tcphdr, check); + th->check = ~csum_ipv6_magic((const struct in6_addr *)&ip6h->saddr, + (const struct in6_addr *)&ip6h->daddr, + len, IPPROTO_TCP, 0); + } + } + + skb->csum_start = skb->transport_header; + + return 0; +} + +static int virtnet_set_csum_after_xdp(struct virtnet_info *vi, + struct sk_buff *skb, + __u8 flags) +{ + int err; + + /* When XDP program is loaded, for example, the vm-vm scenario + * on the same host, packets marked as VIRTIO_NET_HDR_F_NEEDS_CSUM + * will travel. Although these packets are safe from the point of + * view of the vm, to avoid modification by XDP and successful + * forwarding in the upper layer, we re-probe the necessary checksum + * related information: skb->csum_{start, offset}, pseudo-header csum. + * + * This benefits us: + * 1. XDP can be loaded when there's _F_GUEST_CSUM. + * 2. The device verifies the checksum of packets , especially + * benefiting for large packets. + * 3. In the same-host vm-vm scenario, packets marked as + * VIRTIO_NET_HDR_F_NEEDS_CSUM are no longer dropped after being + * processed by XDP. + */ + if (flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + err = virtnet_flow_dissect_udp_tcp(vi, skb); + if (err < 0) + return -EINVAL; + + skb->ip_summed = CHECKSUM_PARTIAL; + } else if (flags & VIRTIO_NET_HDR_F_DATA_VALID) { + /* We want to benefit from this: XDP guarantees that packets marked + * as VIRTIO_NET_HDR_F_DATA_VALID still have correct csum after they + * are processed. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + return 0; +} + static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, void *buf, unsigned int len, void **ctx, unsigned int *xdp_xmit, -- 2.19.1.6.gb485710b