On Mon, Jun 13, 2016 at 05:47:09PM +0800, Liang Li wrote: > The implementation of the current virtio-balloon is not very efficient, > Bellow is test result of time spends on inflating the balloon to 3GB of > a 4GB idle guest: > > a. allocating pages (6.5%, 103ms) > b. sending PFNs to host (68.3%, 787ms) > c. address translation (6.1%, 96ms) > d. madvise (19%, 300ms) > > It takes about 1577ms for the whole inflating process to complete. The > test shows that the bottle neck is the stage b and stage d. > > If using a bitmap to send the page info instead of the PFNs, we can > reduce the overhead in stage b quite a lot. Furthermore, it's possible > to do the address translation and the madvise with a bulk of pages, > instead of the current page per page way, so the overhead of stage c > and stage d can also be reduced a lot. > > This patch is the kernel side implementation which is intended to speed > up the inflating & deflating process by adding a new feature to the > virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB > idle guest only takes 200ms, it's about 8 times as fast as before. > > TODO: optimize stage a by allocating/freeing a chunk of pages instead > of a single page at a time. > > Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx> > Suggested-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Cornelia Huck <cornelia.huck@xxxxxxxxxx> > Cc: Amit Shah <amit.shah@xxxxxxxxxx> Causes kbuild warnings > --- > drivers/virtio/virtio_balloon.c | 164 +++++++++++++++++++++++++++++++----- > include/uapi/linux/virtio_balloon.h | 1 + > 2 files changed, 144 insertions(+), 21 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 8d649a2..1fa601b 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -40,11 +40,19 @@ > #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 > #define OOM_VBALLOON_DEFAULT_PAGES 256 > #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 > +#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB */ 2<< 30 is 2G but that is not a useful comment. pls explain what is the reason for this selection. > > static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; > module_param(oom_pages, int, S_IRUSR | S_IWUSR); > MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); > > +struct balloon_bmap_hdr { > + __virtio32 id; > + __virtio32 page_shift; > + __virtio64 start_pfn; > + __virtio64 bmap_len; > +}; > + Put this in an uapi header please. > struct virtio_balloon { > struct virtio_device *vdev; > struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > @@ -62,6 +70,11 @@ struct virtio_balloon { > > /* Number of balloon pages we've told the Host we're not using. */ > unsigned int num_pages; > + /* Bitmap and length used to tell the host the pages */ > + unsigned long *page_bitmap; > + unsigned long bmap_len; > + /* Used to record the processed pfn range */ > + unsigned long min_pfn, max_pfn, start_pfn, end_pfn; > /* > * The pages we've told the Host we're not using are enqueued > * at vb_dev_info->pages list. > @@ -105,15 +118,51 @@ static void balloon_ack(struct virtqueue *vq) > wake_up(&vb->acked); > } > > +static inline void init_pfn_range(struct virtio_balloon *vb) > +{ > + vb->min_pfn = (1UL << 48); Where does this value come from? Do you want ULONG_MAX? This does not fit in long on 32 bit systems. > + vb->max_pfn = 0; > +} > + > +static inline void update_pfn_range(struct virtio_balloon *vb, > + struct page *page) > +{ > + unsigned long balloon_pfn = page_to_balloon_pfn(page); > + > + if (balloon_pfn < vb->min_pfn) > + vb->min_pfn = balloon_pfn; > + if (balloon_pfn > vb->max_pfn) > + vb->max_pfn = balloon_pfn; > +} > + > static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) > { > - struct scatterlist sg; > unsigned int len; > > - sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) { > + struct balloon_bmap_hdr hdr; why not init fields here? > + unsigned long bmap_len; and here > + struct scatterlist sg[2]; > + > + hdr.id = cpu_to_virtio32(vb->vdev, 0); > + hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT); > + hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn); > + bmap_len = min(vb->bmap_len, > + (vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE); > + hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len); > + sg_init_table(sg, 2); > + sg_set_buf(&sg[0], &hdr, sizeof(hdr)); > + sg_set_buf(&sg[1], vb->page_bitmap, bmap_len); > + virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL); might fail if queue size < 2. validate queue size and clear VIRTIO_BALLOON_F_PAGE_BITMAP? Alternatively, and I think preferably, use first struct balloon_bmap_hdr bytes in the buffer to pass the header to host. > + } else { > + struct scatterlist sg; > > - /* We should always be able to add one buffer to an empty queue. */ > - virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); > + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); > + /* We should always be able to add one buffer to an > + * empty queue. > + */ > + virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); > + } > virtqueue_kick(vq); > > /* When host has read buffer, this completes via balloon_ack */ > @@ -133,13 +182,50 @@ static void set_page_pfns(struct virtio_balloon *vb, > page_to_balloon_pfn(page) + i); > } > > -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) > +static void set_page_bitmap(struct virtio_balloon *vb, > + struct list_head *pages, struct virtqueue *vq) > +{ > + unsigned long pfn; > + struct page *page, *next; > + bool find; find -> found > + > + vb->min_pfn = rounddown(vb->min_pfn, BITS_PER_LONG); > + vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG); > + for (pfn = vb->min_pfn; pfn < vb->max_pfn; > + pfn += VIRTIO_BALLOON_PFNS_LIMIT) { > + vb->start_pfn = pfn; > + vb->end_pfn = pfn; > + memset(vb->page_bitmap, 0, vb->bmap_len); > + find = false; > + list_for_each_entry_safe(page, next, pages, lru) { Why _safe? > + unsigned long balloon_pfn = page_to_balloon_pfn(page); > + > + if (balloon_pfn < pfn || > + balloon_pfn >= pfn + VIRTIO_BALLOON_PFNS_LIMIT) > + continue; > + set_bit(balloon_pfn - pfn, vb->page_bitmap); > + if (balloon_pfn > vb->end_pfn) > + vb->end_pfn = balloon_pfn; > + find = true; maybe remove page from list? this way we won't go over same entry multiple times ... > + } > + if (find) { > + vb->end_pfn = roundup(vb->end_pfn, BITS_PER_LONG); > + tell_host(vb, vq); > + } > + } > +} > + > +static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num, > + bool use_bmap) > { > struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; > unsigned num_allocated_pages; > > - /* We can only do one array worth at a time. */ > - num = min(num, ARRAY_SIZE(vb->pfns)); > + if (use_bmap) > + init_pfn_range(vb); > + else > + /* We can only do one array worth at a time. */ > + num = min(num, ARRAY_SIZE(vb->pfns)); > > mutex_lock(&vb->balloon_lock); > for (vb->num_pfns = 0; vb->num_pfns < num; > @@ -154,7 +240,10 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) > msleep(200); > break; > } > - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > + if (use_bmap) > + update_pfn_range(vb, page); > + else > + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; > if (!virtio_has_feature(vb->vdev, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) > @@ -163,8 +252,13 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) > > num_allocated_pages = vb->num_pfns; > /* Did we get any? */ > - if (vb->num_pfns != 0) > - tell_host(vb, vb->inflate_vq); > + if (vb->num_pfns != 0) { > + if (use_bmap) > + set_page_bitmap(vb, &vb_dev_info->pages, > + vb->inflate_vq); don't we need pages_lock if we access vb_dev_info->pages? > + else > + tell_host(vb, vb->inflate_vq); > + } > mutex_unlock(&vb->balloon_lock); > > return num_allocated_pages; > @@ -184,15 +278,19 @@ static void release_pages_balloon(struct virtio_balloon *vb, > } > } > > -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) > +static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num, > + bool use_bmap) > { > unsigned num_freed_pages; > struct page *page; > struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; > LIST_HEAD(pages); > > - /* We can only do one array worth at a time. */ > - num = min(num, ARRAY_SIZE(vb->pfns)); > + if (use_bmap) > + init_pfn_range(vb); > + else > + /* We can only do one array worth at a time. */ > + num = min(num, ARRAY_SIZE(vb->pfns)); > > mutex_lock(&vb->balloon_lock); > for (vb->num_pfns = 0; vb->num_pfns < num; > @@ -200,7 +298,10 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) > page = balloon_page_dequeue(vb_dev_info); > if (!page) > break; > - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > + if (use_bmap) > + update_pfn_range(vb, page); > + else > + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); > list_add(&page->lru, &pages); > vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; > } > @@ -211,9 +312,14 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) > * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); > * is true, we *have* to do it in this order > */ > - if (vb->num_pfns != 0) > - tell_host(vb, vb->deflate_vq); > - release_pages_balloon(vb, &pages); > + if (vb->num_pfns != 0) { > + if (use_bmap) > + set_page_bitmap(vb, &pages, vb->deflate_vq); > + else > + tell_host(vb, vb->deflate_vq); > + > + release_pages_balloon(vb, &pages); > + } > mutex_unlock(&vb->balloon_lock); > return num_freed_pages; > } > @@ -347,13 +453,15 @@ static int virtballoon_oom_notify(struct notifier_block *self, > struct virtio_balloon *vb; > unsigned long *freed; > unsigned num_freed_pages; > + bool use_bmap; > > vb = container_of(self, struct virtio_balloon, nb); > if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) > return NOTIFY_OK; > > freed = parm; > - num_freed_pages = leak_balloon(vb, oom_pages); > + use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP); > + num_freed_pages = leak_balloon(vb, oom_pages, use_bmap); > update_balloon_size(vb); > *freed += num_freed_pages; > > @@ -373,15 +481,17 @@ static void update_balloon_size_func(struct work_struct *work) > { > struct virtio_balloon *vb; > s64 diff; > + bool use_bmap; > > vb = container_of(work, struct virtio_balloon, > update_balloon_size_work); > + use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP); > diff = towards_target(vb); > > if (diff > 0) > - diff -= fill_balloon(vb, diff); > + diff -= fill_balloon(vb, diff, use_bmap); > else if (diff < 0) > - diff += leak_balloon(vb, -diff); > + diff += leak_balloon(vb, -diff, use_bmap); > update_balloon_size(vb); > > if (diff) > @@ -508,6 +618,13 @@ static int virtballoon_probe(struct virtio_device *vdev) > spin_lock_init(&vb->stop_update_lock); > vb->stop_update = false; > vb->num_pages = 0; > + vb->bmap_len = ALIGN(VIRTIO_BALLOON_PFNS_LIMIT, BITS_PER_LONG) / > + BITS_PER_BYTE + 2 * sizeof(unsigned long); > + vb->page_bitmap = kzalloc(vb->bmap_len, GFP_KERNEL); > + if (!vb->page_bitmap) { > + err = -ENOMEM; > + goto out; > + } How about we clear the bitmap feature on this failure? > mutex_init(&vb->balloon_lock); > init_waitqueue_head(&vb->acked); > vb->vdev = vdev; > @@ -541,9 +658,12 @@ out: > > static void remove_common(struct virtio_balloon *vb) > { > + bool use_bmap; > + > + use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP); > /* There might be pages left in the balloon: free them. */ > while (vb->num_pages) > - leak_balloon(vb, vb->num_pages); > + leak_balloon(vb, vb->num_pages, use_bmap); > update_balloon_size(vb); > > /* Now we reset the device so we can clean up the queues. */ > @@ -565,6 +685,7 @@ static void virtballoon_remove(struct virtio_device *vdev) > cancel_work_sync(&vb->update_balloon_stats_work); > > remove_common(vb); > + kfree(vb->page_bitmap); > kfree(vb); > } > > @@ -603,6 +724,7 @@ static unsigned int features[] = { > VIRTIO_BALLOON_F_MUST_TELL_HOST, > VIRTIO_BALLOON_F_STATS_VQ, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > + VIRTIO_BALLOON_F_PAGE_BITMAP, > }; > > static struct virtio_driver virtio_balloon_driver = { > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > index 343d7dd..f78fa47 100644 > --- a/include/uapi/linux/virtio_balloon.h > +++ b/include/uapi/linux/virtio_balloon.h > @@ -34,6 +34,7 @@ > #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > +#define VIRTIO_BALLOON_F_PAGE_BITMAP 3 /* Send page info with bitmap */ > > /* Size of a PFN in the balloon interface. */ > #define VIRTIO_BALLOON_PFN_SHIFT 12 > -- > 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html