This patch adds support in the existing virtio balloon infrastructure so that the hyper list created could be send to the host (QEMU) for processing by using deflate_vq. Signed-off-by: Nitesh Narayan Lal <niteshnarayanlalleo@xxxxxxxxx> --- drivers/virtio/virtio_balloon.c | 48 +++++++++++ drivers/virtio/virtio_ring.c | 178 ++++++++++++++++++++++++++++++++++++++++ include/linux/page_hinting.h | 16 ++++ include/linux/virtio.h | 19 +++++ virt/kvm/page_hinting.c | 30 +++---- 5 files changed, 272 insertions(+), 19 deletions(-) create mode 100644 include/linux/page_hinting.h diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f0b3a0b..8828bc0 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -32,6 +32,7 @@ #include <linux/mm.h> #include <linux/mount.h> #include <linux/magic.h> +#include <linux/page_hinting.h> /* * Balloon device works in 4K page units. So each page is pointed to by @@ -95,6 +96,48 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +#ifdef CONFIG_KVM_FREE_PAGE_HINTING + +static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq, + unsigned int pfn) +{ + unsigned int id = VIRTQUEUE_DESC_ID_INIT; + u64 addr = pfn << VIRTIO_BALLOON_PFN_SHIFT; + + virtqueue_add_chain_desc(vq, addr, PAGE_SIZE, &id, &id, 0); + virtqueue_add_chain(vq, id, 0, NULL, (void *)addr, NULL); + virtqueue_kick_sync(vq); +} + +void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries) +{ + int i = 0, j = 0; + int pfn_cnt = 0; + + for (i = 0; i < hyper_entries; i++) { + unsigned long pfn = hypervisor_pagelist[i].pfn; + unsigned long pfn_end = hypervisor_pagelist[i].pfn + + hypervisor_pagelist[i].pages - 1; + while (pfn <= pfn_end) { + if (pfn_cnt == VIRTIO_BALLOON_ARRAY_PFNS_MAX) { + j = 0; + while (j < pfn_cnt) { + tell_host_one_page(vb, vb->deflate_vq, + vb->pfns[j]); + vb->pfns[j] = 0; + j++; + } + vb->num_pfns = 0; + pfn_cnt = 0; + } + vb->pfns[pfn_cnt++] = pfn; + vb->num_pfns++; + pfn++; + } + } +} +#endif + static u32 page_to_balloon_pfn(struct page *page) { unsigned long pfn = page_to_pfn(page); @@ -581,6 +624,11 @@ static int virtballoon_probe(struct virtio_device *vdev) virtio_device_ready(vdev); +#ifdef CONFIG_KVM_FREE_PAGE_HINTING + request_hypercall = (void *)&virtballoon_page_hinting; + balloon_ptr = vb; +#endif + if (towards_target(vb)) virtballoon_changed(vdev); return 0; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5e1b548..65306d5 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -283,6 +283,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, return -EIO; } + if (gfp == GFP_ATOMIC) + vq->indirect = false; #ifdef DEBUG { ktime_t now = ktime_get(); @@ -437,6 +439,131 @@ static inline int virtqueue_add(struct virtqueue *_vq, } /** + * virtqueue_add_chain - expose a chain of buffers to the other end + * @_vq: the struct virtqueue we're talking about. + * @head: desc id of the chain head. + * @indirect: set if the chain of descs are indrect descs. + * @indir_desc: the first indirect desc. + * @data: the token identifying the chain. + * @ctx: extra context for the token. + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_chain(struct virtqueue *_vq, + unsigned int head, + bool indirect, + struct vring_desc *indir_desc, + void *data, + void *ctx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* The desc chain is empty. */ + if (head == VIRTQUEUE_DESC_ID_INIT) + return 0; + + START_USE(vq); + + if (unlikely(vq->broken)) { + END_USE(vq); + return -EIO; + } + + vq->desc_state[head].data = data; + if (indirect) + vq->desc_state[head].indir_desc = indir_desc; + if (ctx) + vq->desc_state[head].indir_desc = ctx; + + virtqueue_kick(_vq); + + return 0; +} +EXPORT_SYMBOL_GPL(virtqueue_add_chain); + +/** + * virtqueue_add_chain_desc - add a buffer to a chain using a vring desc + * @vq: the struct virtqueue we're talking about. + * @addr: address of the buffer to add. + * @len: length of the buffer. + * @head_id: desc id of the chain head. + * @prev_id: desc id of the previous buffer. + * @in: set if the buffer is for the device to write. + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_chain_desc(struct virtqueue *_vq, + u64 addr, + u32 len, + unsigned int *head_id, + unsigned int *prev_id, + bool in) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_desc *desc = vq->vring.desc; + u16 flags = in ? VRING_DESC_F_WRITE : 0; + unsigned int i; + + /* Sanity check */ + if (!_vq || !head_id || !prev_id) + return -EINVAL; +retry: + START_USE(vq); + if (unlikely(vq->broken)) { + END_USE(vq); + return -EIO; + } + + if (vq->vq.num_free < 1) { + /* + * If there is no desc avail in the vq, so kick what is + * already added, and re-start to build a new chain for + * the passed sg. + */ + if (likely(*head_id != VIRTQUEUE_DESC_ID_INIT)) { + END_USE(vq); + virtqueue_add_chain(_vq, *head_id, 0, NULL, vq, NULL); + virtqueue_kick_sync(_vq); + *head_id = VIRTQUEUE_DESC_ID_INIT; + *prev_id = VIRTQUEUE_DESC_ID_INIT; + goto retry; + } else { + END_USE(vq); + return -ENOSPC; + } + } + + i = vq->free_head; + flags &= ~VRING_DESC_F_NEXT; + desc[i].flags = cpu_to_virtio16(_vq->vdev, flags); + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); + desc[i].len = cpu_to_virtio32(_vq->vdev, len); + + /* Add the desc to the end of the chain */ + if (*prev_id != VIRTQUEUE_DESC_ID_INIT) { + desc[*prev_id].next = cpu_to_virtio16(_vq->vdev, i); + desc[*prev_id].flags |= cpu_to_virtio16(_vq->vdev, + VRING_DESC_F_NEXT); + } + *prev_id = i; + if (*head_id == VIRTQUEUE_DESC_ID_INIT) + *head_id = *prev_id; + + vq->vq.num_free--; + vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next); + END_USE(vq); + + return 0; +} +EXPORT_SYMBOL_GPL(virtqueue_add_chain_desc); + +/** * virtqueue_add_sgs - expose buffers to other end * @vq: the struct virtqueue we're talking about. * @sgs: array of terminated scatterlists. @@ -608,6 +735,56 @@ bool virtqueue_notify(struct virtqueue *_vq) EXPORT_SYMBOL_GPL(virtqueue_notify); /** + * virtqueue_kick_async - update after add_buf and blocking till update is done + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_* calls, invoke this to kick + * the other side. Blocking till the other side is done with the update. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns false if kick failed, otherwise true. + */ +bool virtqueue_kick_async(struct virtqueue *vq, wait_queue_head_t wq) +{ + u32 len; + + if (likely(virtqueue_kick(vq))) { + wait_event(wq, virtqueue_get_buf(vq, &len)); + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(virtqueue_kick_async); + +/** + * virtqueue_kick_sync - update after add_buf and busy wait till update is done + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_* calls, invoke this to kick + * the other side. Busy wait till the other side is done with the update. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns false if kick failed, otherwise true. + */ +bool virtqueue_kick_sync(struct virtqueue *vq) +{ + u32 len; + + if (likely(virtqueue_kick(vq))) { + while (!virtqueue_get_buf(vq, &len) && + !virtqueue_is_broken(vq)) + cpu_relax(); + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(virtqueue_kick_sync); + +/** * virtqueue_kick - update after add_buf * @vq: the struct virtqueue * @@ -621,6 +798,7 @@ EXPORT_SYMBOL_GPL(virtqueue_notify); */ bool virtqueue_kick(struct virtqueue *vq) { + WARN_ON(1); if (virtqueue_kick_prepare(vq)) return virtqueue_notify(vq); return true; diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h new file mode 100644 index 0000000..0bfb646 --- /dev/null +++ b/include/linux/page_hinting.h @@ -0,0 +1,16 @@ +#define MAX_FGPT_ENTRIES 1000 +/* + * hypervisor_pages - It is a dummy structure passed with the hypercall. + * @pfn - page frame number for the page which is to be freed. + * @pages - number of pages which are supposed to be freed. + * A global array object is used to to hold the list of pfn and pages and is + * passed as part of the hypercall. + */ +struct hypervisor_pages { + unsigned long pfn; + unsigned int pages; +}; + +extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; +extern void (*request_hypercall)(void *, int); +extern void *balloon_ptr; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 28b0e96..bc95e84 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -56,6 +56,25 @@ int virtqueue_add_sgs(struct virtqueue *vq, unsigned int in_sgs, void *data, gfp_t gfp); +/* A desc with this init id is treated as an invalid desc */ +#define VIRTQUEUE_DESC_ID_INIT UINT_MAX +int virtqueue_add_chain_desc(struct virtqueue *_vq, + u64 addr, + u32 len, + unsigned int *head_id, + unsigned int *prev_id, + bool in); + +int virtqueue_add_chain(struct virtqueue *_vq, + unsigned int head, + bool indirect, + struct vring_desc *indirect_desc, + void *data, + void *ctx); + +bool virtqueue_kick_sync(struct virtqueue *vq); + +bool virtqueue_kick_async(struct virtqueue *vq, wait_queue_head_t wq); bool virtqueue_kick(struct virtqueue *vq); diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c index 090b363..ebc2e8b 100644 --- a/virt/kvm/page_hinting.c +++ b/virt/kvm/page_hinting.c @@ -5,8 +5,8 @@ #include <linux/sort.h> #include <linux/kernel.h> #include <trace/events/kvm.h> +#include <linux/page_hinting.h> -#define MAX_FGPT_ENTRIES 1000 #define HYPERLIST_THRESHOLD 500 /* * struct kvm_free_pages - Tracks the pages which are freed by the guest. @@ -21,22 +21,12 @@ struct kvm_free_pages { unsigned int pages; }; -/* - * hypervisor_pages - It is a dummy structure passed with the hypercall. - * @pfn - page frame number for the page which is to be freed. - * @pages - number of pages which are supposed to be freed. - * A global array object is used to to hold the list of pfn and pages and is - * passed as part of the hypercall. - */ -struct hypervisor_pages { - unsigned long pfn; - unsigned int pages; -}; - static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock); DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt); DEFINE_PER_CPU(int, kvm_pt_idx); struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; +void (*request_hypercall)(void *, int); +void *balloon_ptr; static void empty_hyperlist(void) { @@ -49,13 +39,14 @@ static void empty_hyperlist(void) } } -static void make_hypercall(void) +static void hyperlist_ready(int entries) { /* * Dummy function: Tobe filled later. */ - empty_hyperlist(); trace_guest_str_dump("Hypercall to host...:"); + request_hypercall(balloon_ptr, entries); + empty_hyperlist(); } static int sort_pfn(const void *a1, const void *b1) @@ -156,7 +147,7 @@ int compress_hyperlist(void) if (merge_counter != 0) ret = pack_hyperlist() - 1; else - ret = MAX_FGPT_ENTRIES - 1; + ret = MAX_FGPT_ENTRIES; return ret; } @@ -227,16 +218,16 @@ void arch_free_page_slowpath(void) */ if (!prev_free) { hyper_idx++; - hypervisor_pagelist[hyper_idx].pfn = pfn; - hypervisor_pagelist[hyper_idx].pages = 1; trace_guest_free_page_slowpath( hypervisor_pagelist[hyper_idx].pfn, hypervisor_pagelist[hyper_idx].pages); + hypervisor_pagelist[hyper_idx].pfn = pfn; + hypervisor_pagelist[hyper_idx].pages = 1; if (hyper_idx == MAX_FGPT_ENTRIES - 1) { hyper_idx = compress_hyperlist(); if (hyper_idx >= HYPERLIST_THRESHOLD) { - make_hypercall(); + hyperlist_ready(hyper_idx); hyper_idx = 0; } } @@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order) * free pages is full and a hypercall will be made. Until complete free * page list is traversed no further allocaiton will be allowed. */ + do { seq = read_seqbegin(&guest_page_lock); } while (read_seqretry(&guest_page_lock, seq)); -- 2.9.4