This patch implement build XDP buffers in vhost_net. The idea is do userspace copy in vhost_net and build XDP buff based on the page. Vhost_net can then submit one or an array of XDP buffs to underlayer socket (e.g TUN). TUN can choose to do XDP or call build_skb() to build skb. To support build skb, vnet header were also stored into the header of the XDP buff. This userspace copy and XDP buffs building is key to achieve XDP batching in TUN, since TUN does not need to care about userspace copy and then can disable premmption for several XDP buffs to achieve batching from XDP. TODO: reserve headroom based on the TUN XDP. Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx> --- drivers/vhost/net.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f0639d7..1209e84 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -492,6 +492,80 @@ static bool vhost_has_more_pkts(struct vhost_net *net, likely(!vhost_exceeds_maxpend(net)); } +#define VHOST_NET_HEADROOM 256 +#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) + +static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, + struct iov_iter *from, + struct xdp_buff *xdp) +{ + struct vhost_virtqueue *vq = &nvq->vq; + struct page_frag *alloc_frag = ¤t->task_frag; + struct virtio_net_hdr *gso; + size_t len = iov_iter_count(from); + int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + int pad = SKB_DATA_ALIGN(VHOST_NET_RX_PAD + VHOST_NET_HEADROOM + + nvq->sock_hlen); + int sock_hlen = nvq->sock_hlen; + void *buf; + int copied; + + if (len < nvq->sock_hlen) + return -EFAULT; + + if (SKB_DATA_ALIGN(len + pad) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) + return -ENOSPC; + + buflen += SKB_DATA_ALIGN(len + pad); + alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); + if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) + return -ENOMEM; + + buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + + /* We store two kinds of metadata in the header which will be + * used for XDP_PASS to do build_skb(): + * offset 0: buflen + * offset sizeof(int): vnet header + */ + copied = copy_page_from_iter(alloc_frag->page, + alloc_frag->offset + sizeof(int), sock_hlen, from); + if (copied != sock_hlen) + return -EFAULT; + + gso = (struct virtio_net_hdr *)(buf + sizeof(int)); + + if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + vhost16_to_cpu(vq, gso->csum_start) + + vhost16_to_cpu(vq, gso->csum_offset) + 2 > + vhost16_to_cpu(vq, gso->hdr_len)) { + gso->hdr_len = cpu_to_vhost16(vq, + vhost16_to_cpu(vq, gso->csum_start) + + vhost16_to_cpu(vq, gso->csum_offset) + 2); + + if (vhost16_to_cpu(vq, gso->hdr_len) > len) + return -EINVAL; + } + + len -= sock_hlen; + copied = copy_page_from_iter(alloc_frag->page, + alloc_frag->offset + pad, + len, from); + if (copied != len) + return -EFAULT; + + xdp->data_hard_start = buf; + xdp->data = buf + pad; + xdp->data_end = xdp->data + len; + *(int *)(xdp->data_hard_start)= buflen; + + get_page(alloc_frag->page); + alloc_frag->offset += buflen; + + return 0; +} + static void handle_tx_copy(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; -- 2.7.4