On Tue, 12 Jun 2018 11:02:27 -0400 Nitesh Narayan Lal <nilal@xxxxxxxxxx> wrote: > On 06/11/2018 04:26 PM, Luiz Capitulino wrote: > > On Mon, 11 Jun 2018 11:19:00 -0400 > > nilal@xxxxxxxxxx wrote: > > > >> From: Nitesh Narayan Lal <nilal@xxxxxxxxxx> > >> > >> This patch creates a new vq (hinting_vq) to be used for page hinting > >> and adds support in the existing virtio balloon infrastructure so > >> that the hyper list carrying pages which are supposed to be freed > >> could be sent to the host (QEMU) for processing by using hinting_vq. > >> > >> Signed-off-by: Nitesh Narayan Lal <nilal@xxxxxxxxxx> > >> --- > >> drivers/virtio/virtio_balloon.c | 99 ++++++++++++++++++++++++++++++++++++- > >> include/linux/page_hinting.h | 16 ++++++ > >> include/uapi/linux/virtio_balloon.h | 1 + > >> virt/kvm/page_hinting.c | 36 ++++++-------- > >> 4 files changed, 129 insertions(+), 23 deletions(-) > >> create mode 100644 include/linux/page_hinting.h > >> > >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > >> index 6b237e3..217523f 100644 > >> --- a/drivers/virtio/virtio_balloon.c > >> +++ b/drivers/virtio/virtio_balloon.c > >> @@ -32,6 +32,7 @@ > >> #include <linux/mm.h> > >> #include <linux/mount.h> > >> #include <linux/magic.h> > >> +#include <linux/page_hinting.h> > >> > >> /* > >> * Balloon device works in 4K page units. So each page is pointed to by > >> @@ -53,8 +54,11 @@ static struct vfsmount *balloon_mnt; > >> > >> struct virtio_balloon { > >> struct virtio_device *vdev; > >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING > >> + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq; > >> +#else > >> struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > >> - > >> +#endif > > This is duplicating the exiting virtqueue pointers. It's only hinting_vq > > that has to be protected by CONFIG_KVM_FREE_PAGE_HINTING. > I will move hinting_vq into a separate statement, so that other > virtqueue pointers are not duplicated. > > > >> /* The balloon servicing is delegated to a freezable workqueue. */ > >> struct work_struct update_balloon_stats_work; > >> struct work_struct update_balloon_size_work; > >> @@ -95,6 +99,33 @@ static struct virtio_device_id id_table[] = { > >> { 0 }, > >> }; > >> > >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING > >> +static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq, > >> + u64 gvaddr, int len) > >> +{ > > I think this function actually tells the host the whole array at once, > > right (vs. a single page). In this case, maybe it should be renamed to > > tell_host_free_pfns? > It communicates the address where the first entry of hypervisor_pagelist > is stored. Will it make sense if I rename it to tell_host_free_list? Yes, that's good. > > > >> + unsigned int id = VIRTQUEUE_DESC_ID_INIT; > >> + u64 gpaddr = virt_to_phys((void *)gvaddr); > >> + > >> + virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0); > >> + virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL); > >> +} > >> + > >> +void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries) > >> +{ > >> + u64 gvaddr = (u64)hypervisor_pagelist; > >> + > >> + vb->num_pfns = hyper_entries; > >> + tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries); > >> +} > >> + > >> +static void hinting_ack(struct virtqueue *vq) > >> +{ > >> + struct virtio_balloon *vb = vq->vdev->priv; > >> + > >> + wake_up(&vb->acked); > > Honest question: is there a process to be woken up? If I understood > > correctly, the guest busy-waits for the host ACK, so the calling process is > > never put to sleep. Or did I misunderstand? > Yes, It is not required. I will remove this. > > > >> +} > >> +#endif > >> + > >> static u32 page_to_balloon_pfn(struct page *page) > >> { > >> unsigned long pfn = page_to_pfn(page); > >> @@ -425,6 +456,62 @@ static void update_balloon_size_func(struct work_struct *work) > >> queue_work(system_freezable_wq, work); > >> } > >> > >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING > >> +static int init_vqs(struct virtio_balloon *vb) > >> +{ > > I don't think you can duplicate init_vqs(). > I will use a single init_vqs with ifdefs to add the code corresponding > to page hinting in my next patch-series. Thank you for pointing this out. > > > >> + struct virtqueue *vqs[4]; > >> + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request, > >> + hinting_ack }; > >> + static const char * const names[] = { "inflate", "deflate", "stats", > >> + "hinting" }; > >> + int err, nvqs; > >> + bool stats_vq_support, page_hinting_support; > >> + > >> + /* > >> + * We expect two virtqueues: inflate and deflate, and > >> + * optionally stat and hinting. > >> + */ > >> + stats_vq_support = virtio_has_feature(vb->vdev, > >> + VIRTIO_BALLOON_F_STATS_VQ); > >> + page_hinting_support = virtio_has_feature(vb->vdev, > >> + VIRTIO_GUEST_PAGE_HINTING_VQ > >> + ); > >> + if (stats_vq_support && page_hinting_support) > >> + nvqs = 4; > >> + else if (stats_vq_support || page_hinting_support) > >> + nvqs = 3; > >> + else > >> + nvqs = 2; > >> + > >> + err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); > >> + if (err) > >> + return err; > >> + > >> + vb->inflate_vq = vqs[0]; > >> + vb->deflate_vq = vqs[1]; > >> + if (page_hinting_support) > >> + vb->hinting_vq = vqs[3]; > >> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > >> + struct scatterlist sg; > >> + unsigned int num_stats; > >> + > >> + vb->stats_vq = vqs[2]; > >> + > >> + /* > >> + * Prime this virtqueue with one buffer so the hypervisor can > >> + * use it to signal us later (it can't be broken yet!). > >> + */ > >> + num_stats = update_balloon_stats(vb); > >> + > >> + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); > >> + if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) > >> + < 0) > >> + BUG(); > >> + virtqueue_kick(vb->stats_vq); > >> + } > >> + return 0; > >> +} > >> +#else > >> static int init_vqs(struct virtio_balloon *vb) > >> { > >> struct virtqueue *vqs[3]; > >> @@ -463,6 +550,8 @@ static int init_vqs(struct virtio_balloon *vb) > >> return 0; > >> } > >> > >> +#endif > >> + > >> #ifdef CONFIG_BALLOON_COMPACTION > >> /* > >> * virtballoon_migratepage - perform the balloon page migration on behalf of > >> @@ -604,6 +693,13 @@ static int virtballoon_probe(struct virtio_device *vdev) > >> > >> virtio_device_ready(vdev); > >> > >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING > >> + if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) { > >> + request_hypercall = (void *)&virtballoon_page_hinting; > >> + balloon_ptr = vb; > >> + } > >> +#endif > > Very minor, but I think it would be nicer if you used a function from > > virt/kvm/page_hinting.c instead of assigning global variables. Say: > > > > page_hinting_enable(vb, virtballoon_page_hinting); > I will make this change. > > > >> + > >> if (towards_target(vb)) > >> virtballoon_changed(vdev); > >> return 0; > >> @@ -692,6 +788,7 @@ static unsigned int features[] = { > >> VIRTIO_BALLOON_F_MUST_TELL_HOST, > >> VIRTIO_BALLOON_F_STATS_VQ, > >> VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > >> + VIRTIO_GUEST_PAGE_HINTING_VQ, > >> }; > >> > >> static struct virtio_driver virtio_balloon_driver = { > >> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h > >> new file mode 100644 > >> index 0000000..0bfb646 > >> --- /dev/null > >> +++ b/include/linux/page_hinting.h > >> @@ -0,0 +1,16 @@ > >> +#define MAX_FGPT_ENTRIES 1000 > >> +/* > >> + * hypervisor_pages - It is a dummy structure passed with the hypercall. > >> + * @pfn - page frame number for the page which is to be freed. > >> + * @pages - number of pages which are supposed to be freed. > >> + * A global array object is used to to hold the list of pfn and pages and is > >> + * passed as part of the hypercall. > >> + */ > >> +struct hypervisor_pages { > >> + unsigned long pfn; > >> + unsigned int pages; > >> +}; > >> + > >> +extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; > >> +extern void (*request_hypercall)(void *, int); > >> +extern void *balloon_ptr; > >> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > >> index 13b8cb5..16d299f 100644 > >> --- a/include/uapi/linux/virtio_balloon.h > >> +++ b/include/uapi/linux/virtio_balloon.h > >> @@ -34,6 +34,7 @@ > >> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ > >> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > >> #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > >> +#define VIRTIO_GUEST_PAGE_HINTING_VQ 3 /* Page hinting virtqueue */ > >> > >> /* Size of a PFN in the balloon interface. */ > >> #define VIRTIO_BALLOON_PFN_SHIFT 12 > >> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c > >> index 97500bb..417582a 100644 > >> --- a/virt/kvm/page_hinting.c > >> +++ b/virt/kvm/page_hinting.c > >> @@ -5,8 +5,8 @@ > >> #include <linux/sort.h> > >> #include <linux/kernel.h> > >> #include <trace/events/kmem.h> > >> +#include <linux/page_hinting.h> > >> > >> -#define MAX_FGPT_ENTRIES 1000 > >> #define HYPERLIST_THRESHOLD 1 /* FIXME: find a good threshold */ > >> /* > >> * struct kvm_free_pages - Tracks the pages which are freed by the guest. > >> @@ -21,22 +21,15 @@ struct kvm_free_pages { > >> unsigned int pages; > >> }; > >> > >> -/* > >> - * hypervisor_pages - It is a dummy structure passed with the hypercall. > >> - * @pfn - page frame number for the page which is to be freed. > >> - * @pages - number of pages which are supposed to be freed. > >> - * A global array object is used to to hold the list of pfn and pages and is > >> - * passed as part of the hypercall. > >> - */ > >> -struct hypervisor_pages { > >> - unsigned long pfn; > >> - unsigned int pages; > >> -}; > >> - > >> static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock); > >> DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt); > >> DEFINE_PER_CPU(int, kvm_pt_idx); > >> struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; > >> +EXPORT_SYMBOL(hypervisor_pagelist); > >> +void (*request_hypercall)(void *, int); > >> +EXPORT_SYMBOL(request_hypercall); > >> +void *balloon_ptr; > >> +EXPORT_SYMBOL(balloon_ptr); > >> > >> static void empty_hyperlist(void) > >> { > >> @@ -49,13 +42,11 @@ static void empty_hyperlist(void) > >> } > >> } > >> > >> -void make_hypercall(void) > >> +void hyperlist_ready(int entries) > >> { > >> - /* > >> - * Dummy function: Tobe filled later. > >> - */ > >> - empty_hyperlist(); > >> trace_guest_str_dump("Hypercall to host...:"); > >> + request_hypercall(balloon_ptr, entries); > >> + empty_hyperlist(); > >> } > >> > >> static int sort_pfn(const void *a1, const void *b1) > >> @@ -156,7 +147,7 @@ int compress_hyperlist(void) > >> if (merge_counter != 0) > >> ret = pack_hyperlist() - 1; > >> else > >> - ret = MAX_FGPT_ENTRIES - 1; > >> + ret = MAX_FGPT_ENTRIES; > >> return ret; > >> } > >> > >> @@ -227,16 +218,16 @@ void arch_free_page_slowpath(void) > >> */ > >> if (!prev_free) { > >> hyper_idx++; > >> - hypervisor_pagelist[hyper_idx].pfn = pfn; > >> - hypervisor_pagelist[hyper_idx].pages = 1; > >> trace_guest_free_page_slowpath( > >> hypervisor_pagelist[hyper_idx].pfn, > >> hypervisor_pagelist[hyper_idx].pages); > >> + hypervisor_pagelist[hyper_idx].pfn = pfn; > >> + hypervisor_pagelist[hyper_idx].pages = 1; > >> if (hyper_idx == MAX_FGPT_ENTRIES - 1) { > >> hyper_idx = compress_hyperlist(); > >> if (hyper_idx >= > >> HYPERLIST_THRESHOLD) { > >> - make_hypercall(); > >> + hyperlist_ready(hyper_idx); > >> hyper_idx = 0; > >> } > >> } > >> @@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order) > >> * free pages is full and a hypercall will be made. Until complete free > >> * page list is traversed no further allocaiton will be allowed. > >> */ > >> + > >> do { > >> seq = read_seqbegin(&guest_page_lock); > >> } while (read_seqretry(&guest_page_lock, seq)); >