[Patch v2 4/4] KVM: Sending hyperlist to the host via deflate_vq

Nitesh Narayan Lal <niteshnarayanlalleo@xxxxxxxxx> · Wed, 13 Sep 2017 12:17:05 -0400

This patch adds support in the existing virtio balloon
infrastructure so that the hyper list created could be send
to the host (QEMU) for processing by using deflate_vq.

Signed-off-by: Nitesh Narayan Lal <niteshnarayanlalleo@xxxxxxxxx>
---
 drivers/virtio/virtio_balloon.c |  48 +++++++++++
 drivers/virtio/virtio_ring.c    | 178 ++++++++++++++++++++++++++++++++++++++++
 include/linux/page_hinting.h    |  16 ++++
 include/linux/virtio.h          |  19 +++++
 virt/kvm/page_hinting.c         |  30 +++----
 5 files changed, 272 insertions(+), 19 deletions(-)
 create mode 100644 include/linux/page_hinting.h

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index f0b3a0b..8828bc0 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -32,6 +32,7 @@
 #include <linux/mm.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
+#include <linux/page_hinting.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -95,6 +96,48 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+
+static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
+			       unsigned int pfn)
+{
+	unsigned int id = VIRTQUEUE_DESC_ID_INIT;
+	u64 addr = pfn << VIRTIO_BALLOON_PFN_SHIFT;
+
+	virtqueue_add_chain_desc(vq, addr, PAGE_SIZE, &id, &id, 0);
+	virtqueue_add_chain(vq, id, 0, NULL, (void *)addr, NULL);
+	virtqueue_kick_sync(vq);
+}
+
+void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
+{
+	int i = 0, j = 0;
+	int pfn_cnt = 0;
+
+	for (i = 0; i < hyper_entries; i++) {
+		unsigned long pfn = hypervisor_pagelist[i].pfn;
+		unsigned long pfn_end = hypervisor_pagelist[i].pfn +
+					hypervisor_pagelist[i].pages - 1;
+		while (pfn <= pfn_end) {
+			if (pfn_cnt == VIRTIO_BALLOON_ARRAY_PFNS_MAX) {
+				j = 0;
+				while (j < pfn_cnt) {
+					tell_host_one_page(vb, vb->deflate_vq,
+							   vb->pfns[j]);
+					vb->pfns[j] = 0;
+					j++;
+				}
+				vb->num_pfns = 0;
+				pfn_cnt = 0;
+			}
+			vb->pfns[pfn_cnt++] = pfn;
+			vb->num_pfns++;
+			pfn++;
+		}
+	}
+}
+#endif
+
 static u32 page_to_balloon_pfn(struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
@@ -581,6 +624,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+	request_hypercall = (void *)&virtballoon_page_hinting;
+	balloon_ptr = vb;
+#endif
+
 	if (towards_target(vb))
 		virtballoon_changed(vdev);
 	return 0;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5e1b548..65306d5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -283,6 +283,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 		return -EIO;
 	}
 
+	if (gfp == GFP_ATOMIC)
+		vq->indirect = false;
 #ifdef DEBUG
 	{
 		ktime_t now = ktime_get();
@@ -437,6 +439,131 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 }
 
 /**
+ * virtqueue_add_chain - expose a chain of buffers to the other end
+ * @_vq: the struct virtqueue we're talking about.
+ * @head: desc id of the chain head.
+ * @indirect: set if the chain of descs are indrect descs.
+ * @indir_desc: the first indirect desc.
+ * @data: the token identifying the chain.
+ * @ctx: extra context for the token.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_chain(struct virtqueue *_vq,
+			unsigned int head,
+			bool indirect,
+			struct vring_desc *indir_desc,
+			void *data,
+			void *ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* The desc chain is empty. */
+	if (head == VIRTQUEUE_DESC_ID_INIT)
+		return 0;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	vq->desc_state[head].data = data;
+	if (indirect)
+		vq->desc_state[head].indir_desc = indir_desc;
+	if (ctx)
+		vq->desc_state[head].indir_desc = ctx;
+
+	virtqueue_kick(_vq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_chain);
+
+/**
+ * virtqueue_add_chain_desc - add a buffer to a chain using a vring desc
+ * @vq: the struct virtqueue we're talking about.
+ * @addr: address of the buffer to add.
+ * @len: length of the buffer.
+ * @head_id: desc id of the chain head.
+ * @prev_id: desc id of the previous buffer.
+ * @in: set if the buffer is for the device to write.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_chain_desc(struct virtqueue *_vq,
+			     u64 addr,
+			     u32 len,
+			     unsigned int *head_id,
+			     unsigned int *prev_id,
+			     bool in)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc = vq->vring.desc;
+	u16 flags = in ? VRING_DESC_F_WRITE : 0;
+	unsigned int i;
+
+	/* Sanity check */
+	if (!_vq || !head_id || !prev_id)
+		return -EINVAL;
+retry:
+	START_USE(vq);
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	if (vq->vq.num_free < 1) {
+		/*
+		 * If there is no desc avail in the vq, so kick what is
+		 * already added, and re-start to build a new chain for
+		 * the passed sg.
+		 */
+		if (likely(*head_id != VIRTQUEUE_DESC_ID_INIT)) {
+			END_USE(vq);
+			virtqueue_add_chain(_vq, *head_id, 0, NULL, vq, NULL);
+			virtqueue_kick_sync(_vq);
+			*head_id = VIRTQUEUE_DESC_ID_INIT;
+			*prev_id = VIRTQUEUE_DESC_ID_INIT;
+			goto retry;
+		} else {
+			END_USE(vq);
+			return -ENOSPC;
+		}
+	}
+
+	i = vq->free_head;
+	flags &= ~VRING_DESC_F_NEXT;
+	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
+	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
+	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
+
+	/* Add the desc to the end of the chain */
+	if (*prev_id != VIRTQUEUE_DESC_ID_INIT) {
+		desc[*prev_id].next = cpu_to_virtio16(_vq->vdev, i);
+		desc[*prev_id].flags |= cpu_to_virtio16(_vq->vdev,
+							VRING_DESC_F_NEXT);
+	}
+	*prev_id = i;
+	if (*head_id == VIRTQUEUE_DESC_ID_INIT)
+		*head_id = *prev_id;
+
+	vq->vq.num_free--;
+	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
+	END_USE(vq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_chain_desc);
+
+/**
  * virtqueue_add_sgs - expose buffers to other end
  * @vq: the struct virtqueue we're talking about.
  * @sgs: array of terminated scatterlists.
@@ -608,6 +735,56 @@ bool virtqueue_notify(struct virtqueue *_vq)
 EXPORT_SYMBOL_GPL(virtqueue_notify);
 
 /**
+ * virtqueue_kick_async - update after add_buf and blocking till update is done
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_* calls, invoke this to kick
+ * the other side. Blocking till the other side is done with the update.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns false if kick failed, otherwise true.
+ */
+bool virtqueue_kick_async(struct virtqueue *vq, wait_queue_head_t wq)
+{
+	u32 len;
+
+	if (likely(virtqueue_kick(vq))) {
+		wait_event(wq, virtqueue_get_buf(vq, &len));
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_async);
+
+/**
+ * virtqueue_kick_sync - update after add_buf and busy wait till update is done
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_* calls, invoke this to kick
+ * the other side. Busy wait till the other side is done with the update.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns false if kick failed, otherwise true.
+ */
+bool virtqueue_kick_sync(struct virtqueue *vq)
+{
+	u32 len;
+
+	if (likely(virtqueue_kick(vq))) {
+		while (!virtqueue_get_buf(vq, &len) &&
+		       !virtqueue_is_broken(vq))
+			cpu_relax();
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
+
+/**
  * virtqueue_kick - update after add_buf
  * @vq: the struct virtqueue
  *
@@ -621,6 +798,7 @@ EXPORT_SYMBOL_GPL(virtqueue_notify);
  */
 bool virtqueue_kick(struct virtqueue *vq)
 {
+	WARN_ON(1);
 	if (virtqueue_kick_prepare(vq))
 		return virtqueue_notify(vq);
 	return true;
diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
new file mode 100644
index 0000000..0bfb646
--- /dev/null
+++ b/include/linux/page_hinting.h
@@ -0,0 +1,16 @@
+#define MAX_FGPT_ENTRIES	1000
+/*
+ * hypervisor_pages - It is a dummy structure passed with the hypercall.
+ * @pfn - page frame number for the page which is to be freed.
+ * @pages - number of pages which are supposed to be freed.
+ * A global array object is used to to hold the list of pfn and pages and is
+ * passed as part of the hypercall.
+ */
+struct hypervisor_pages {
+	unsigned long pfn;
+	unsigned int pages;
+};
+
+extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+extern void (*request_hypercall)(void *, int);
+extern void *balloon_ptr;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 28b0e96..bc95e84 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -56,6 +56,25 @@ int virtqueue_add_sgs(struct virtqueue *vq,
 		      unsigned int in_sgs,
 		      void *data,
 		      gfp_t gfp);
+/* A desc with this init id is treated as an invalid desc */
+#define VIRTQUEUE_DESC_ID_INIT UINT_MAX
+int virtqueue_add_chain_desc(struct virtqueue *_vq,
+			     u64 addr,
+			     u32 len,
+			     unsigned int *head_id,
+			     unsigned int *prev_id,
+			     bool in);
+
+int virtqueue_add_chain(struct virtqueue *_vq,
+			unsigned int head,
+			bool indirect,
+			struct vring_desc *indirect_desc,
+			void *data,
+			void *ctx);
+
+bool virtqueue_kick_sync(struct virtqueue *vq);
+
+bool virtqueue_kick_async(struct virtqueue *vq, wait_queue_head_t wq);
 
 bool virtqueue_kick(struct virtqueue *vq);
 
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
index 090b363..ebc2e8b 100644
--- a/virt/kvm/page_hinting.c
+++ b/virt/kvm/page_hinting.c
@@ -5,8 +5,8 @@
 #include <linux/sort.h>
 #include <linux/kernel.h>
 #include <trace/events/kvm.h>
+#include <linux/page_hinting.h>
 
-#define MAX_FGPT_ENTRIES	1000
 #define HYPERLIST_THRESHOLD	500
 /*
  * struct kvm_free_pages - Tracks the pages which are freed by the guest.
@@ -21,22 +21,12 @@ struct kvm_free_pages {
 	unsigned int pages;
 };
 
-/*
- * hypervisor_pages - It is a dummy structure passed with the hypercall.
- * @pfn - page frame number for the page which is to be freed.
- * @pages - number of pages which are supposed to be freed.
- * A global array object is used to to hold the list of pfn and pages and is
- * passed as part of the hypercall.
- */
-struct hypervisor_pages {
-	unsigned long pfn;
-	unsigned int pages;
-};
-
 static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
 DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
 DEFINE_PER_CPU(int, kvm_pt_idx);
 struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+void (*request_hypercall)(void *, int);
+void *balloon_ptr;
 
 static void empty_hyperlist(void)
 {
@@ -49,13 +39,14 @@ static void empty_hyperlist(void)
 	}
 }
 
-static void make_hypercall(void)
+static void hyperlist_ready(int entries)
 {
 	/*
 	 * Dummy function: Tobe filled later.
 	 */
-	empty_hyperlist();
 	trace_guest_str_dump("Hypercall to host...:");
+	request_hypercall(balloon_ptr, entries);
+	empty_hyperlist();
 }
 
 static int sort_pfn(const void *a1, const void *b1)
@@ -156,7 +147,7 @@ int compress_hyperlist(void)
 	if (merge_counter != 0)
 		ret = pack_hyperlist() - 1;
 	else
-		ret = MAX_FGPT_ENTRIES - 1;
+		ret = MAX_FGPT_ENTRIES;
 	return ret;
 }
 
@@ -227,16 +218,16 @@ void arch_free_page_slowpath(void)
 			 */
 			if (!prev_free) {
 				hyper_idx++;
-				hypervisor_pagelist[hyper_idx].pfn = pfn;
-				hypervisor_pagelist[hyper_idx].pages = 1;
 				trace_guest_free_page_slowpath(
 				hypervisor_pagelist[hyper_idx].pfn,
 				hypervisor_pagelist[hyper_idx].pages);
+				hypervisor_pagelist[hyper_idx].pfn = pfn;
+				hypervisor_pagelist[hyper_idx].pages = 1;
 				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
 					hyper_idx =  compress_hyperlist();
 					if (hyper_idx >=
 					    HYPERLIST_THRESHOLD) {
-						make_hypercall();
+						hyperlist_ready(hyper_idx);
 						hyper_idx = 0;
 					}
 				}
@@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order)
 	 * free pages is full and a hypercall will be made. Until complete free
 	 * page list is traversed no further allocaiton will be allowed.
 	 */
+
 	do {
 		seq = read_seqbegin(&guest_page_lock);
 	} while (read_seqretry(&guest_page_lock, seq));
-- 
2.9.4