From: Nitesh Narayan Lal <nilal@xxxxxxxxxx> This patch creates a new vq (hinting_vq) to be used for page hinting and adds support in the existing virtio balloon infrastructure so that the hyper list carrying pages which are supposed to be freed could be sent to the host (QEMU) for processing by using hinting_vq. Signed-off-by: Nitesh Narayan Lal <nilal@xxxxxxxxxx> --- drivers/virtio/virtio_balloon.c | 99 ++++++++++++++++++++++++++++++++++++- include/linux/page_hinting.h | 16 ++++++ include/uapi/linux/virtio_balloon.h | 1 + virt/kvm/page_hinting.c | 36 ++++++-------- 4 files changed, 129 insertions(+), 23 deletions(-) create mode 100644 include/linux/page_hinting.h diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7960746..7aef032 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -32,6 +32,7 @@ #include <linux/mm.h> #include <linux/mount.h> #include <linux/magic.h> +#include <linux/page_hinting.h> /* * Balloon device works in 4K page units. So each page is pointed to by @@ -53,8 +54,11 @@ static struct vfsmount *balloon_mnt; struct virtio_balloon { struct virtio_device *vdev; +#ifdef CONFIG_KVM_FREE_PAGE_HINTING + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq; +#else struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; - +#endif /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; struct work_struct update_balloon_size_work; @@ -95,6 +99,33 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +#ifdef CONFIG_KVM_FREE_PAGE_HINTING +static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq, + u64 gvaddr, int len) +{ + unsigned int id = VIRTQUEUE_DESC_ID_INIT; + u64 gpaddr = virt_to_phys((void *)gvaddr); + + virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0); + virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL); +} + +void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries) +{ + u64 gvaddr = (u64)hypervisor_pagelist; + + vb->num_pfns = hyper_entries; + tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries); +} + +static void hinting_ack(struct virtqueue *vq) +{ + struct virtio_balloon *vb = vq->vdev->priv; + + wake_up(&vb->acked); +} +#endif + static u32 page_to_balloon_pfn(struct page *page) { unsigned long pfn = page_to_pfn(page); @@ -416,6 +447,62 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +#ifdef CONFIG_KVM_FREE_PAGE_HINTING +static int init_vqs(struct virtio_balloon *vb) +{ + struct virtqueue *vqs[4]; + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request, + hinting_ack }; + static const char * const names[] = { "inflate", "deflate", "stats", + "hinting" }; + int err, nvqs; + bool stats_vq_support, page_hinting_support; + + /* + * We expect two virtqueues: inflate and deflate, and + * optionally stat and hinting. + */ + stats_vq_support = virtio_has_feature(vb->vdev, + VIRTIO_BALLOON_F_STATS_VQ); + page_hinting_support = virtio_has_feature(vb->vdev, + VIRTIO_GUEST_PAGE_HINTING_VQ + ); + if (stats_vq_support && page_hinting_support) + nvqs = 4; + else if (stats_vq_support || page_hinting_support) + nvqs = 3; + else + nvqs = 2; + + err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); + if (err) + return err; + + vb->inflate_vq = vqs[0]; + vb->deflate_vq = vqs[1]; + if (page_hinting_support) + vb->hinting_vq = vqs[3]; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + struct scatterlist sg; + unsigned int num_stats; + + vb->stats_vq = vqs[2]; + + /* + * Prime this virtqueue with one buffer so the hypervisor can + * use it to signal us later (it can't be broken yet!). + */ + num_stats = update_balloon_stats(vb); + + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); + if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) + < 0) + BUG(); + virtqueue_kick(vb->stats_vq); + } + return 0; +} +#else static int init_vqs(struct virtio_balloon *vb) { struct virtqueue *vqs[3]; @@ -454,6 +541,8 @@ static int init_vqs(struct virtio_balloon *vb) return 0; } +#endif + #ifdef CONFIG_BALLOON_COMPACTION /* * virtballoon_migratepage - perform the balloon page migration on behalf of @@ -595,6 +684,13 @@ static int virtballoon_probe(struct virtio_device *vdev) virtio_device_ready(vdev); +#ifdef CONFIG_KVM_FREE_PAGE_HINTING + if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) { + request_hypercall = (void *)&virtballoon_page_hinting; + balloon_ptr = vb; + } +#endif + if (towards_target(vb)) virtballoon_changed(vdev); return 0; @@ -683,6 +779,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, + VIRTIO_GUEST_PAGE_HINTING_VQ, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h new file mode 100644 index 0000000..0bfb646 --- /dev/null +++ b/include/linux/page_hinting.h @@ -0,0 +1,16 @@ +#define MAX_FGPT_ENTRIES 1000 +/* + * hypervisor_pages - It is a dummy structure passed with the hypercall. + * @pfn - page frame number for the page which is to be freed. + * @pages - number of pages which are supposed to be freed. + * A global array object is used to to hold the list of pfn and pages and is + * passed as part of the hypercall. + */ +struct hypervisor_pages { + unsigned long pfn; + unsigned int pages; +}; + +extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; +extern void (*request_hypercall)(void *, int); +extern void *balloon_ptr; diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 343d7dd..45bdcc8 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,6 +34,7 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_GUEST_PAGE_HINTING_VQ 3 /* Page hinting virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c index 54fe6bc..22c892b 100644 --- a/virt/kvm/page_hinting.c +++ b/virt/kvm/page_hinting.c @@ -5,8 +5,8 @@ #include <linux/sort.h> #include <linux/kernel.h> #include <trace/events/kmem.h> +#include <linux/page_hinting.h> -#define MAX_FGPT_ENTRIES 1000 #define HYPERLIST_THRESHOLD 500 /* * struct kvm_free_pages - Tracks the pages which are freed by the guest. @@ -21,22 +21,15 @@ struct kvm_free_pages { unsigned int pages; }; -/* - * hypervisor_pages - It is a dummy structure passed with the hypercall. - * @pfn - page frame number for the page which is to be freed. - * @pages - number of pages which are supposed to be freed. - * A global array object is used to to hold the list of pfn and pages and is - * passed as part of the hypercall. - */ -struct hypervisor_pages { - unsigned long pfn; - unsigned int pages; -}; - static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock); DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt); DEFINE_PER_CPU(int, kvm_pt_idx); struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; +EXPORT_SYMBOL(hypervisor_pagelist); +void (*request_hypercall)(void *, int); +EXPORT_SYMBOL(request_hypercall); +void *balloon_ptr; +EXPORT_SYMBOL(balloon_ptr); static void empty_hyperlist(void) { @@ -49,13 +42,11 @@ static void empty_hyperlist(void) } } -void make_hypercall(void) +void hyperlist_ready(int entries) { - /* - * Dummy function: Tobe filled later. - */ - empty_hyperlist(); trace_guest_str_dump("Hypercall to host...:"); + request_hypercall(balloon_ptr, entries); + empty_hyperlist(); } static int sort_pfn(const void *a1, const void *b1) @@ -156,7 +147,7 @@ int compress_hyperlist(void) if (merge_counter != 0) ret = pack_hyperlist() - 1; else - ret = MAX_FGPT_ENTRIES - 1; + ret = MAX_FGPT_ENTRIES; return ret; } @@ -227,16 +218,16 @@ void arch_free_page_slowpath(void) */ if (!prev_free) { hyper_idx++; - hypervisor_pagelist[hyper_idx].pfn = pfn; - hypervisor_pagelist[hyper_idx].pages = 1; trace_guest_free_page_slowpath( hypervisor_pagelist[hyper_idx].pfn, hypervisor_pagelist[hyper_idx].pages); + hypervisor_pagelist[hyper_idx].pfn = pfn; + hypervisor_pagelist[hyper_idx].pages = 1; if (hyper_idx == MAX_FGPT_ENTRIES - 1) { hyper_idx = compress_hyperlist(); if (hyper_idx >= HYPERLIST_THRESHOLD) { - make_hypercall(); + hyperlist_ready(hyper_idx); hyper_idx = 0; } } @@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order) * free pages is full and a hypercall will be made. Until complete free * page list is traversed no further allocaiton will be allowed. */ + do { seq = read_seqbegin(&guest_page_lock); } while (read_seqretry(&guest_page_lock, seq)); -- 2.9.4