Re: [RFC][Patch V7 5/7] KVM: Sending hyperlist to the host via hinting_vq

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 12 Jun 2018 11:02:27 -0400
Nitesh Narayan Lal <nilal@xxxxxxxxxx> wrote:

> On 06/11/2018 04:26 PM, Luiz Capitulino wrote:
> > On Mon, 11 Jun 2018 11:19:00 -0400
> > nilal@xxxxxxxxxx wrote:
> >  
> >> From: Nitesh Narayan Lal <nilal@xxxxxxxxxx>
> >>
> >> This patch creates a new vq (hinting_vq) to be used for page hinting
> >> and adds support in the existing virtio balloon infrastructure so
> >> that the hyper list carrying pages which are supposed to be freed
> >> could be sent to the host (QEMU) for processing by using hinting_vq.
> >>
> >> Signed-off-by: Nitesh Narayan Lal <nilal@xxxxxxxxxx>
> >> ---
> >>  drivers/virtio/virtio_balloon.c     | 99 ++++++++++++++++++++++++++++++++++++-
> >>  include/linux/page_hinting.h        | 16 ++++++
> >>  include/uapi/linux/virtio_balloon.h |  1 +
> >>  virt/kvm/page_hinting.c             | 36 ++++++--------
> >>  4 files changed, 129 insertions(+), 23 deletions(-)
> >>  create mode 100644 include/linux/page_hinting.h
> >>
> >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> >> index 6b237e3..217523f 100644
> >> --- a/drivers/virtio/virtio_balloon.c
> >> +++ b/drivers/virtio/virtio_balloon.c
> >> @@ -32,6 +32,7 @@
> >>  #include <linux/mm.h>
> >>  #include <linux/mount.h>
> >>  #include <linux/magic.h>
> >> +#include <linux/page_hinting.h>
> >>  
> >>  /*
> >>   * Balloon device works in 4K page units.  So each page is pointed to by
> >> @@ -53,8 +54,11 @@ static struct vfsmount *balloon_mnt;
> >>  
> >>  struct virtio_balloon {
> >>  	struct virtio_device *vdev;
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq;
> >> +#else
> >>  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> >> -
> >> +#endif  
> > This is duplicating the exiting virtqueue pointers. It's only hinting_vq
> > that has to be protected by CONFIG_KVM_FREE_PAGE_HINTING.  
> I will move hinting_vq into a separate statement, so that other
> virtqueue pointers are not duplicated.
> >  
> >>  	/* The balloon servicing is delegated to a freezable workqueue. */
> >>  	struct work_struct update_balloon_stats_work;
> >>  	struct work_struct update_balloon_size_work;
> >> @@ -95,6 +99,33 @@ static struct virtio_device_id id_table[] = {
> >>  	{ 0 },
> >>  };
> >>  
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
> >> +			       u64 gvaddr, int len)
> >> +{  
> > I think this function actually tells the host the whole array at once,
> > right (vs. a single page). In this case, maybe it should be renamed to
> > tell_host_free_pfns?  
> It communicates the address where the first entry of hypervisor_pagelist
> is stored. Will it make sense if I rename it to tell_host_free_list?

Yes, that's good.

> >  
> >> +	unsigned int id = VIRTQUEUE_DESC_ID_INIT;
> >> +	u64 gpaddr = virt_to_phys((void *)gvaddr);
> >> +
> >> +	virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0);
> >> +	virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL);
> >> +}
> >> +
> >> +void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
> >> +{
> >> +	u64 gvaddr = (u64)hypervisor_pagelist;
> >> +
> >> +	vb->num_pfns = hyper_entries;
> >> +	tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries);
> >> +}
> >> +
> >> +static void hinting_ack(struct virtqueue *vq)
> >> +{
> >> +	struct virtio_balloon *vb = vq->vdev->priv;
> >> +
> >> +	wake_up(&vb->acked);  
> > Honest question: is there a process to be woken up? If I understood
> > correctly, the guest busy-waits for the host ACK, so the calling process is
> > never put to sleep. Or did I misunderstand?  
> Yes, It is not required. I will remove this.
> >  
> >> +}
> >> +#endif
> >> +
> >>  static u32 page_to_balloon_pfn(struct page *page)
> >>  {
> >>  	unsigned long pfn = page_to_pfn(page);
> >> @@ -425,6 +456,62 @@ static void update_balloon_size_func(struct work_struct *work)
> >>  		queue_work(system_freezable_wq, work);
> >>  }
> >>  
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +static int init_vqs(struct virtio_balloon *vb)
> >> +{  
> > I don't think you can duplicate init_vqs().  
> I will use a single init_vqs with ifdefs to add the code corresponding
> to page hinting in my next patch-series. Thank you for pointing this out.
> >  
> >> +	struct virtqueue *vqs[4];
> >> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request,
> >> +				       hinting_ack };
> >> +	static const char * const names[] = { "inflate", "deflate", "stats",
> >> +					      "hinting" };
> >> +	int err, nvqs;
> >> +	bool stats_vq_support, page_hinting_support;
> >> +
> >> +	/*
> >> +	 * We expect two virtqueues: inflate and deflate, and
> >> +	 * optionally stat and hinting.
> >> +	 */
> >> +	stats_vq_support = virtio_has_feature(vb->vdev,
> >> +					      VIRTIO_BALLOON_F_STATS_VQ);
> >> +	page_hinting_support = virtio_has_feature(vb->vdev,
> >> +						  VIRTIO_GUEST_PAGE_HINTING_VQ
> >> +						  );
> >> +	if (stats_vq_support && page_hinting_support)
> >> +		nvqs = 4;
> >> +	else if (stats_vq_support || page_hinting_support)
> >> +		nvqs = 3;
> >> +	else
> >> +		nvqs = 2;
> >> +
> >> +	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
> >> +	if (err)
> >> +		return err;
> >> +
> >> +	vb->inflate_vq = vqs[0];
> >> +	vb->deflate_vq = vqs[1];
> >> +	if (page_hinting_support)
> >> +		vb->hinting_vq = vqs[3];
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> >> +		struct scatterlist sg;
> >> +		unsigned int num_stats;
> >> +
> >> +		vb->stats_vq = vqs[2];
> >> +
> >> +		/*
> >> +		 * Prime this virtqueue with one buffer so the hypervisor can
> >> +		 * use it to signal us later (it can't be broken yet!).
> >> +		 */
> >> +		num_stats = update_balloon_stats(vb);
> >> +
> >> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
> >> +		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
> >> +		    < 0)
> >> +			BUG();
> >> +		virtqueue_kick(vb->stats_vq);
> >> +	}
> >> +	return 0;
> >> +}
> >> +#else
> >>  static int init_vqs(struct virtio_balloon *vb)
> >>  {
> >>  	struct virtqueue *vqs[3];
> >> @@ -463,6 +550,8 @@ static int init_vqs(struct virtio_balloon *vb)
> >>  	return 0;
> >>  }
> >>  
> >> +#endif
> >> +
> >>  #ifdef CONFIG_BALLOON_COMPACTION
> >>  /*
> >>   * virtballoon_migratepage - perform the balloon page migration on behalf of
> >> @@ -604,6 +693,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >>  
> >>  	virtio_device_ready(vdev);
> >>  
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
> >> +		request_hypercall = (void *)&virtballoon_page_hinting;
> >> +		balloon_ptr = vb;
> >> +	}
> >> +#endif  
> > Very minor, but I think it would be nicer if you used a function from
> > virt/kvm/page_hinting.c instead of assigning global variables. Say:
> >
> > page_hinting_enable(vb, virtballoon_page_hinting);  
> I will make this change.
> >  
> >> +
> >>  	if (towards_target(vb))
> >>  		virtballoon_changed(vdev);
> >>  	return 0;
> >> @@ -692,6 +788,7 @@ static unsigned int features[] = {
> >>  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
> >>  	VIRTIO_BALLOON_F_STATS_VQ,
> >>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> >> +	VIRTIO_GUEST_PAGE_HINTING_VQ,
> >>  };
> >>  
> >>  static struct virtio_driver virtio_balloon_driver = {
> >> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
> >> new file mode 100644
> >> index 0000000..0bfb646
> >> --- /dev/null
> >> +++ b/include/linux/page_hinting.h
> >> @@ -0,0 +1,16 @@
> >> +#define MAX_FGPT_ENTRIES	1000
> >> +/*
> >> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
> >> + * @pfn - page frame number for the page which is to be freed.
> >> + * @pages - number of pages which are supposed to be freed.
> >> + * A global array object is used to to hold the list of pfn and pages and is
> >> + * passed as part of the hypercall.
> >> + */
> >> +struct hypervisor_pages {
> >> +	unsigned long pfn;
> >> +	unsigned int pages;
> >> +};
> >> +
> >> +extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> >> +extern void (*request_hypercall)(void *, int);
> >> +extern void *balloon_ptr;
> >> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> >> index 13b8cb5..16d299f 100644
> >> --- a/include/uapi/linux/virtio_balloon.h
> >> +++ b/include/uapi/linux/virtio_balloon.h
> >> @@ -34,6 +34,7 @@
> >>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
> >>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
> >>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> >> +#define VIRTIO_GUEST_PAGE_HINTING_VQ	3 /* Page hinting virtqueue */
> >>  
> >>  /* Size of a PFN in the balloon interface. */
> >>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> >> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
> >> index 97500bb..417582a 100644
> >> --- a/virt/kvm/page_hinting.c
> >> +++ b/virt/kvm/page_hinting.c
> >> @@ -5,8 +5,8 @@
> >>  #include <linux/sort.h>
> >>  #include <linux/kernel.h>
> >>  #include <trace/events/kmem.h>
> >> +#include <linux/page_hinting.h>
> >>  
> >> -#define MAX_FGPT_ENTRIES	1000
> >>  #define HYPERLIST_THRESHOLD	1	/* FIXME: find a good threshold */
> >>  /*
> >>   * struct kvm_free_pages - Tracks the pages which are freed by the guest.
> >> @@ -21,22 +21,15 @@ struct kvm_free_pages {
> >>  	unsigned int pages;
> >>  };
> >>  
> >> -/*
> >> - * hypervisor_pages - It is a dummy structure passed with the hypercall.
> >> - * @pfn - page frame number for the page which is to be freed.
> >> - * @pages - number of pages which are supposed to be freed.
> >> - * A global array object is used to to hold the list of pfn and pages and is
> >> - * passed as part of the hypercall.
> >> - */
> >> -struct hypervisor_pages {
> >> -	unsigned long pfn;
> >> -	unsigned int pages;
> >> -};
> >> -
> >>  static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
> >>  DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
> >>  DEFINE_PER_CPU(int, kvm_pt_idx);
> >>  struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> >> +EXPORT_SYMBOL(hypervisor_pagelist);
> >> +void (*request_hypercall)(void *, int);
> >> +EXPORT_SYMBOL(request_hypercall);
> >> +void *balloon_ptr;
> >> +EXPORT_SYMBOL(balloon_ptr);
> >>  
> >>  static void empty_hyperlist(void)
> >>  {
> >> @@ -49,13 +42,11 @@ static void empty_hyperlist(void)
> >>  	}
> >>  }
> >>  
> >> -void make_hypercall(void)
> >> +void hyperlist_ready(int entries)
> >>  {
> >> -	/*
> >> -	 * Dummy function: Tobe filled later.
> >> -	 */
> >> -	empty_hyperlist();
> >>  	trace_guest_str_dump("Hypercall to host...:");
> >> +	request_hypercall(balloon_ptr, entries);
> >> +	empty_hyperlist();
> >>  }
> >>  
> >>  static int sort_pfn(const void *a1, const void *b1)
> >> @@ -156,7 +147,7 @@ int compress_hyperlist(void)
> >>  	if (merge_counter != 0)
> >>  		ret = pack_hyperlist() - 1;
> >>  	else
> >> -		ret = MAX_FGPT_ENTRIES - 1;
> >> +		ret = MAX_FGPT_ENTRIES;
> >>  	return ret;
> >>  }
> >>  
> >> @@ -227,16 +218,16 @@ void arch_free_page_slowpath(void)
> >>  			 */
> >>  			if (!prev_free) {
> >>  				hyper_idx++;
> >> -				hypervisor_pagelist[hyper_idx].pfn = pfn;
> >> -				hypervisor_pagelist[hyper_idx].pages = 1;
> >>  				trace_guest_free_page_slowpath(
> >>  				hypervisor_pagelist[hyper_idx].pfn,
> >>  				hypervisor_pagelist[hyper_idx].pages);
> >> +				hypervisor_pagelist[hyper_idx].pfn = pfn;
> >> +				hypervisor_pagelist[hyper_idx].pages = 1;
> >>  				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
> >>  					hyper_idx =  compress_hyperlist();
> >>  					if (hyper_idx >=
> >>  					    HYPERLIST_THRESHOLD) {
> >> -						make_hypercall();
> >> +						hyperlist_ready(hyper_idx);
> >>  						hyper_idx = 0;
> >>  					}
> >>  				}
> >> @@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order)
> >>  	 * free pages is full and a hypercall will be made. Until complete free
> >>  	 * page list is traversed no further allocaiton will be allowed.
> >>  	 */
> >> +
> >>  	do {
> >>  		seq = read_seqbegin(&guest_page_lock);
> >>  	} while (read_seqretry(&guest_page_lock, seq));  
> 




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux