[PATCH 5/5] KVM: Add hypercall queue for paravirt_ops implementation

Anthony Liguori <anthony@xxxxxxxxxxxxx> · Wed, 20 Jun 2007 22:09:39 -0500

Regards,

Anthony Liguori

Subject: [PATCH] KVM: Add hypercall queue for paravirt_ops implementation
Author: Anthony Liguori <aliguori@xxxxxxxxxx>

Implemented a hypercall queue that can be used when paravirt_ops lazy mode
is enabled.  This patch enables queueing of MMU write operations and CR
updates.  This results in about a 50% bump in kernbench performance.

Signed-off-by: Anthony Liguori <aliguori@xxxxxxxxxx>

diff --git a/arch/i386/kernel/kvm.c b/arch/i386/kernel/kvm.c
index 21133e4..52eb50d 100644
--- a/arch/i386/kernel/kvm.c
+++ b/arch/i386/kernel/kvm.c
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/dmi.h>
 #include <linux/acpi_pmtmr.h>
+#include <linux/hardirq.h>
 
 #include "mach_timer.h"
 
@@ -46,8 +47,12 @@ struct kvm_paravirt_state
 	unsigned long cached_cr[5];
 	int cr_valid[5];
 
-	struct kvm_vmca *vmca;
+	enum paravirt_lazy_mode mode;
 	struct kvm_hypercall_entry *queue;
+	int queue_index;
+	int max_queue_index;
+
+	struct kvm_vmca *vmca;
 	void (*hypercall)(void);
 
 	u64 vmca_gpa;
@@ -55,18 +60,18 @@ struct kvm_paravirt_state
 
 static DEFINE_PER_CPU(struct kvm_paravirt_state *, paravirt_state);
 
+static int do_hypercall_batching;
 static int do_mmu_write;
 static int do_cr_read_caching;
 static int do_nop_io_delay;
 static int do_paravirt_clock;
 static u64 msr_set_vmca;
 
-static long kvm_hypercall(unsigned int nr, unsigned long p1,
-			  unsigned long p2, unsigned long p3,
-			  unsigned long p4)
+static long _kvm_hypercall(struct kvm_paravirt_state *state,
+			   unsigned int nr, unsigned long p1,
+			   unsigned long p2, unsigned long p3,
+			   unsigned long p4)
 {
-	struct kvm_paravirt_state *state
-		= per_cpu(paravirt_state, smp_processor_id());
 	long ret;
 
 	asm volatile("call *(%6) \n\t"
@@ -83,6 +88,69 @@ static long kvm_hypercall(unsigned int nr, unsigned long p1,
 	return ret;
 }
 
+static int can_defer_hypercall(struct kvm_paravirt_state *state,
+			       unsigned int nr)
+{
+	if (state->mode == PARAVIRT_LAZY_MMU) {
+		switch (nr) {
+		case KVM_HYPERCALL_MMU_WRITE:
+		case KVM_HYPERCALL_FLUSH_TLB:
+		case KVM_HYPERCALL_FLUSH_TLB_SINGLE:
+			return 1;
+		}
+	} else if (state->mode == PARAVIRT_LAZY_CPU) {
+		if (nr == KVM_HYPERCALL_SET_CR)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void hypercall_queue_flush(struct kvm_paravirt_state *state)
+{
+	if (state->queue_index) {
+		_kvm_hypercall(state, KVM_HYPERCALL_FLUSH, __pa(state->queue),
+			       state->queue_index, 0, 0);
+		state->queue_index = 0;
+	}
+}
+
+static void _kvm_hypercall_defer(struct kvm_paravirt_state *state,
+				 unsigned int nr,
+				 unsigned long p1, unsigned long p2,
+				 unsigned long p3, unsigned long p4)
+{
+	struct kvm_hypercall_entry *entry;
+
+	BUG_ON(preemptible());
+
+	if (state->queue_index == state->max_queue_index)
+		hypercall_queue_flush(state);
+
+	entry = &state->queue[state->queue_index++];
+	entry->nr = nr;
+	entry->p1 = p1;
+	entry->p2 = p2;
+	entry->p3 = p3;
+	entry->p4 = p4;
+}
+
+static long kvm_hypercall(unsigned int nr, unsigned long p1,
+			  unsigned long p2, unsigned long p3,
+			  unsigned long p4)
+{
+	struct kvm_paravirt_state *state = get_cpu_var(paravirt_state);
+	long ret = 0;
+
+	if (can_defer_hypercall(state, nr))
+		_kvm_hypercall_defer(state, nr, p1, p2, p3, p4);
+	else
+		ret = _kvm_hypercall(state, nr, p1, p2, p3, p4);
+
+	put_cpu_var(paravirt_state);
+	return ret;
+}
+
 static cycle_t kvm_clocksource_read(void)
 {
 	struct kvm_paravirt_state *state = get_cpu_var(paravirt_state);
@@ -146,18 +214,22 @@ static __always_inline void kvm_write_cr(int reg, unsigned long value)
 	state->cr_valid[reg] = 1;
 	state->cached_cr[reg] = value;
 
-	switch (reg) {
-	case 0:
-		native_write_cr0(value);
-		break;
-	case 3:
-		native_write_cr3(value);
-		break;
-	case 4:
-		native_write_cr4(value);
-		break;
-	default:
-		BUG();
+	if (state->mode == PARAVIRT_LAZY_CPU)
+		kvm_hypercall(KVM_HYPERCALL_SET_CR, reg, value, 0, 0);
+	else {
+		switch (reg) {
+		case 0:
+			native_write_cr0(value);
+			break;
+		case 3:
+			native_write_cr3(value);
+			break;
+		case 4:
+			native_write_cr4(value);
+			break;
+		default:
+			BUG();
+		}
 	}
 }
 
@@ -269,7 +341,24 @@ static void kvm_flush_tlb(void)
 
 static void kvm_flush_tlb_single(unsigned long addr)
 {
-	kvm_hypercall(KVM_HYPERCALL_FLUSH_TLB_SINGLE, addr, 0, 0, 0);
+	int mode = x86_read_percpu(paravirt_state)->mode;
+
+	if (mode == PARAVIRT_LAZY_MMU)
+		kvm_hypercall(KVM_HYPERCALL_FLUSH_TLB_SINGLE, addr, 0, 0, 0);
+	else
+		__native_flush_tlb_single(addr);
+}
+
+static void kvm_set_lazy_mode(enum paravirt_lazy_mode mode)
+{
+	struct kvm_paravirt_state *state
+		= per_cpu(paravirt_state, smp_processor_id());
+
+	if (mode == PARAVIRT_LAZY_FLUSH || mode == PARAVIRT_LAZY_NONE)
+		hypercall_queue_flush(state);
+
+	if (mode != PARAVIRT_LAZY_FLUSH)
+		state->mode = mode;
 }
 
 static void kvm_release_pt(u32 pfn)
@@ -320,6 +409,9 @@ static void paravirt_ops_setup(void)
 		paravirt_ops.release_pd = kvm_release_pt;
 	}
 
+	if (do_hypercall_batching)
+		paravirt_ops.set_lazy_mode = kvm_set_lazy_mode;
+
 	paravirt_ops.paravirt_enabled = 1;
 }
 
@@ -365,6 +457,9 @@ static int paravirt_initialize(void)
 	if ((edx & KVM_FEATURE_MMU_WRITE))
 		do_mmu_write = 1;
 
+	if ((edx & KVM_FEATURE_HYPERCALL_BATCHING))
+		do_hypercall_batching = 1;
+
 	on_each_cpu(paravirt_activate, NULL, 0, 1);
 
 	return 0;
@@ -375,6 +470,9 @@ static __init void paravirt_free_state(struct kvm_paravirt_state *state)
 	if (!state)
 		return;
 
+	if (state->queue)
+		__free_page(pfn_to_page(__pa(state->queue) >> PAGE_SHIFT));
+
 	if (state->hypercall)
 		__free_page(pfn_to_page(__pa(state->hypercall) >> PAGE_SHIFT));
 
@@ -401,8 +499,15 @@ static __init struct kvm_paravirt_state *paravirt_alloc_state(void)
 	if (!state->hypercall)
 		goto err;
 
+	state->queue = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!state->queue)
+		goto err;
+
 	state->vmca_gpa = __pa(state->vmca);
 	state->vmca->hypercall_gpa = __pa(state->hypercall);
+	state->queue_index = 0;
+	state->max_queue_index
+		= (PAGE_SIZE / sizeof(struct kvm_hypercall_entry));
 
 	return state;
 
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 9a7462a..7e53374 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -291,6 +291,7 @@ struct kvm_vcpu {
 	gpa_t para_state_gpa;
 	struct page *para_state_page;
 	gpa_t hypercall_gpa;
+	struct page *queue_page;
 	unsigned long cr4;
 	unsigned long cr8;
 	u64 pdptrs[4]; /* pae */
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 91aec56..e3afbde 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -96,7 +96,7 @@ struct vfsmount *kvmfs_mnt;
 #define KVM_PARAVIRT_FEATURES \
 	(KVM_FEATURE_VMCA | KVM_FEATURE_NOP_IO_DELAY | \
 	 KVM_FEATURE_PARAVIRT_CLOCK | KVM_FEATURE_CR_READ_CACHE | \
-	 KVM_FEATURE_MMU_WRITE)
+	 KVM_FEATURE_MMU_WRITE | KVM_FEATURE_HYPERCALL_BATCHING)
 
 #define KVM_MSR_SET_VMCA	0x87655678
 
@@ -1410,6 +1410,24 @@ static int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
 	return 0;
 }
 
+static int kvm_hypercall_set_cr(struct kvm_vcpu *vcpu,
+				u32 reg, unsigned long value)
+{
+	switch (reg) {
+	case 0:
+		set_cr0(vcpu, value);
+		break;
+	case 3:
+		set_cr3(vcpu, value);
+		break;
+	case 4:
+		set_cr4(vcpu, value);
+		break;
+	}
+
+	return 0;
+}
+
 static int dispatch_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
 			      unsigned long p1, unsigned long p2,
 			      unsigned long p3, unsigned long p4)
@@ -1419,6 +1437,8 @@ static int dispatch_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
 		return kvm_hypercall_mmu_write(vcpu, p1, p2, p3, p4);
 	case KVM_HYPERCALL_UPDATE_TIME:
 		return kvm_hypercall_update_time(vcpu);
+	case KVM_HYPERCALL_SET_CR:
+		return kvm_hypercall_set_cr(vcpu, p1, p2);
 	case KVM_HYPERCALL_FLUSH_TLB:
 		return kvm_hypercall_flush_tlb(vcpu);
 	case KVM_HYPERCALL_FLUSH_TLB_SINGLE:
@@ -1429,6 +1449,39 @@ static int dispatch_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
 	return -KVM_ENOSYS;
 }
 
+static int kvm_hypercall_flush(struct kvm_vcpu *vcpu, gva_t addr, u32 nb_queue)
+{
+	struct kvm_hypercall_entry *queue;
+	struct page *queue_page;
+	hpa_t queue_hpa;
+	int ret = 0;
+	int i;
+
+	if (nb_queue > (PAGE_SIZE / sizeof(struct kvm_hypercall_entry)))
+		return -KVM_EFAULT;
+
+	queue_hpa = gpa_to_hpa(vcpu, addr);
+	if (is_error_hpa(queue_hpa))
+		return -KVM_EFAULT;
+
+	queue_page = pfn_to_page(queue_hpa >> PAGE_SHIFT);
+	queue = kmap(queue_page);
+
+	for (i = 0; i < nb_queue; i++)
+		ret |= dispatch_hypercall(vcpu, queue[i].nr, queue[i].p1,
+					  queue[i].p2, queue[i].p3,
+					  queue[i].p4);
+
+	if (ret < 0)
+		ret = -KVM_EINVAL;
+	else
+		ret = 0;
+
+	kunmap(queue_page);
+
+	return ret;
+}
+
 int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
@@ -1456,7 +1509,11 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
 	}
 
-	ret = dispatch_hypercall(vcpu, nr, a0, a1, a2, a3);
+	if (nr == KVM_HYPERCALL_FLUSH)
+		ret = kvm_hypercall_flush(vcpu, a0, a1);
+	else
+		ret = dispatch_hypercall(vcpu, nr, a0, a1, a2, a3);
+
 	if (ret == -KVM_ENOSYS) {
 		run->hypercall.args[0] = a0;
 		run->hypercall.args[1] = a1;
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 560de6a..e220832 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -27,6 +27,7 @@ static int __init kvm_guest_init(void)
 #define KVM_FEATURE_PARAVIRT_CLOCK	(1UL << 2)
 #define KVM_FEATURE_CR_READ_CACHE	(1UL << 3)
 #define KVM_FEATURE_MMU_WRITE		(1UL << 4)
+#define KVM_FEATURE_HYPERCALL_BATCHING	(1UL << 5)
 
 struct kvm_vmca
 {
@@ -34,6 +35,15 @@ struct kvm_vmca
 	u64 real_nsecs;
 };
 
+struct kvm_hypercall_entry
+{
+	unsigned long nr;
+	unsigned long p1;
+	unsigned long p2;
+	unsigned long p3;
+	unsigned long p4;
+};
+
 /*
  * Hypercall calling convention:
  *
@@ -55,5 +65,7 @@ struct kvm_vmca
 #define KVM_HYPERCALL_FLUSH_TLB		2
 #define KVM_HYPERCALL_FLUSH_TLB_SINGLE	3
 #define KVM_HYPERCALL_RELEASE_PT	4
+#define KVM_HYPERCALL_SET_CR		5
+#define KVM_HYPERCALL_FLUSH		6
 
 #endif
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/virtualization