For small packets data copy was observed to take up about 15% CPU time. Switch to build_skb and avoid the copy when using mergeable rx buffers. As a bonus, medium-size skbs that fit in a page will be completely linear. Of course, we now need to lower the lower bound on packet size, to make sure a sane number of skbs fits in rx socket buffer. By how much? I don't know yet. It might also be useful to prefetch the packet buffer since net stack will likely use it soon. Lightly tested, in particular, I didn't yet test what this actually does to performance - sending this out for early feedback/flames. TODO: it appears that Linux won't handle correctly the case of first buffer being very small (or consisting exclusively of virtio header). This is already the case for current code, so don't bother testing yet. Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> --- drivers/net/virtio_net.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 767ab11..20f8dda 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -347,6 +347,26 @@ err: return NULL; } +#define VNET_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#define VNET_SKB_BUG (VNET_SKB_PAD < sizeof(struct virtio_net_hdr_mrg_rxbuf)) +#define VNET_SKB_LEN(len) ((len) - sizeof(struct virtio_net_hdr_mrg_rxbuf)) +#define VNET_SKB_OFF VNET_SKB_LEN(VNET_SKB_PAD) + +static struct sk_buff *vnet_build_skb(struct virtnet_info *vi, + void *buf, + unsigned int len, unsigned int truesize) +{ + struct sk_buff *skb = build_skb(buf, truesize); + + if (!skb) + return NULL; + + skb_reserve(skb, VNET_SKB_PAD); + skb_put(skb, VNET_SKB_LEN(len)); + + return skb; +} + static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -354,14 +374,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, unsigned int len) { void *buf = mergeable_ctx_to_buf_address(ctx); - struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + struct virtio_net_hdr_mrg_rxbuf *hdr = buf + VNET_SKB_OFF; u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); - int offset = buf - page_address(page); unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); + int offset; - struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len, - truesize); + struct sk_buff *head_skb = vnet_build_skb(vi, buf, len, truesize); struct sk_buff *curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -606,11 +625,14 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len) { - const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + unsigned int hdr; unsigned int len; - len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), - GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); + hdr = ALIGN(VNET_SKB_PAD + sizeof(struct skb_shared_info), + MERGEABLE_BUFFER_ALIGN); + + len = hdr + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), + 500 /* TODO */, PAGE_SIZE - hdr); return ALIGN(len, MERGEABLE_BUFFER_ALIGN); } @@ -626,7 +648,10 @@ static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + BUILD_BUG_ON(VNET_SKB_BUG); + + buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset + + VNET_SKB_OFF; ctx = mergeable_buf_to_ctx(buf, len); get_page(alloc_frag->page); alloc_frag->offset += len; @@ -641,7 +666,8 @@ static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) alloc_frag->offset += hole; } - sg_init_one(rq->sg, buf, len); + sg_init_one(rq->sg, buf, + len - VNET_SKB_OFF - sizeof(struct skb_shared_info)); err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp); if (err < 0) put_page(virt_to_head_page(buf)); -- MST _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization