On 12/17/2013 08:16 AM, Michael Dalton wrote: > The virtio-net driver currently uses netdev_alloc_frag() for GFP_ATOMIC > mergeable rx buffer allocations. This commit migrates virtio-net to use > per-receive queue page frags for GFP_ATOMIC allocation. This change unifies > mergeable rx buffer memory allocation, which now will use skb_refill_frag() > for both atomic and GFP-WAIT buffer allocations. > > To address fragmentation concerns, if after buffer allocation there > is too little space left in the page frag to allocate a subsequent > buffer, the remaining space is added to the current allocated buffer > so that the remaining space can be used to store packet data. > > Signed-off-by: Michael Dalton <mwdalton@xxxxxxxxxx> > --- > drivers/net/virtio_net.c | 69 ++++++++++++++++++++++++++---------------------- > 1 file changed, 38 insertions(+), 31 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index c51a988..d38d130 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -78,6 +78,9 @@ struct receive_queue { > /* Chain pages by the private ptr. */ > struct page *pages; > > + /* Page frag for GFP_ATOMIC packet buffer allocation. */ > + struct page_frag atomic_frag; > + > /* RX: fragments + linear part + virtio header */ > struct scatterlist sg[MAX_SKB_FRAGS + 2]; > > @@ -127,9 +130,9 @@ struct virtnet_info { > struct mutex config_lock; > > /* Page_frag for GFP_KERNEL packet buffer allocation when we run > - * low on memory. > + * low on memory. May sleep. > */ > - struct page_frag alloc_frag; > + struct page_frag sleep_frag; Any reason to use two different page_frag consider only skb_page_frag_refill() is used? > > /* Does the affinity hint is set for virtqueues? */ > bool affinity_hint_set; > @@ -336,8 +339,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > int num_buf = hdr->mhdr.num_buffers; > struct page *page = virt_to_head_page(buf); > int offset = buf - page_address(page); > - struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, > - MERGE_BUFFER_LEN); > + int truesize = max_t(int, len, MERGE_BUFFER_LEN); > + struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize); > struct sk_buff *curr_skb = head_skb; > > if (unlikely(!curr_skb)) > @@ -353,11 +356,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > dev->stats.rx_length_errors++; > goto err_buf; > } > - if (unlikely(len > MERGE_BUFFER_LEN)) { > - pr_debug("%s: rx error: merge buffer too long\n", > - dev->name); > - len = MERGE_BUFFER_LEN; > - } > > page = virt_to_head_page(buf); > --rq->num; > @@ -376,19 +374,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > head_skb->truesize += nskb->truesize; > num_skb_frags = 0; > } > + truesize = max_t(int, len, MERGE_BUFFER_LEN); > if (curr_skb != head_skb) { > head_skb->data_len += len; > head_skb->len += len; > - head_skb->truesize += MERGE_BUFFER_LEN; > + head_skb->truesize += truesize; > } > offset = buf - page_address(page); > if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { > put_page(page); > skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, > - len, MERGE_BUFFER_LEN); > + len, truesize); > } else { > skb_add_rx_frag(curr_skb, num_skb_frags, page, > - offset, len, MERGE_BUFFER_LEN); > + offset, len, truesize); > } > } > > @@ -579,24 +578,24 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) > static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) > { > struct virtnet_info *vi = rq->vq->vdev->priv; > - char *buf = NULL; > - int err; > + struct page_frag *alloc_frag; > + char *buf; > + int err, len, hole; > > - if (gfp & __GFP_WAIT) { > - if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, > - gfp)) { > - buf = (char *)page_address(vi->alloc_frag.page) + > - vi->alloc_frag.offset; > - get_page(vi->alloc_frag.page); > - vi->alloc_frag.offset += MERGE_BUFFER_LEN; > - } > - } else { > - buf = netdev_alloc_frag(MERGE_BUFFER_LEN); > - } > - if (!buf) > + alloc_frag = (gfp & __GFP_WAIT) ? &vi->sleep_frag : &rq->atomic_frag; > + if (unlikely(!skb_page_frag_refill(MERGE_BUFFER_LEN, alloc_frag, gfp))) > return -ENOMEM; > + buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; > + get_page(alloc_frag->page); > + len = MERGE_BUFFER_LEN; > + alloc_frag->offset += len; > + hole = alloc_frag->size - alloc_frag->offset; > + if (hole < MERGE_BUFFER_LEN) { > + len += hole; > + alloc_frag->offset += hole; > + } > > - sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); > + sg_init_one(rq->sg, buf, len); I wonder whether we can use get_a_page() and give_pages() to recycle the pages like before which may help the performance. We can also do some optimizations for this in vhost. > err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); > if (err < 0) > put_page(virt_to_head_page(buf)); > @@ -1377,6 +1376,16 @@ static void free_receive_bufs(struct virtnet_info *vi) > } > } > > +static void free_receive_page_frags(struct virtnet_info *vi) > +{ > + int i; > + for (i = 0; i < vi->max_queue_pairs; i++) > + if (vi->rq[i].atomic_frag.page) > + put_page(vi->rq[i].atomic_frag.page); > + if (vi->sleep_frag.page) > + put_page(vi->sleep_frag.page); > +} > + > static void free_unused_bufs(struct virtnet_info *vi) > { > void *buf; > @@ -1706,8 +1715,7 @@ free_recv_bufs: > free_vqs: > cancel_delayed_work_sync(&vi->refill); > virtnet_del_vqs(vi); > - if (vi->alloc_frag.page) > - put_page(vi->alloc_frag.page); > + free_receive_page_frags(vi); > free_stats: > free_percpu(vi->stats); > free: > @@ -1741,8 +1749,7 @@ static void virtnet_remove(struct virtio_device *vdev) > unregister_netdev(vi->dev); > > remove_vq_common(vi); > - if (vi->alloc_frag.page) > - put_page(vi->alloc_frag.page); > + free_receive_page_frags(vi); > > flush_work(&vi->config_work); > _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization