[PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When the host enables XSAVE/XRSTOR, the patch exposes the XSAVE/XRSTOR
related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and the
patch allows guest to set CR4.OSXSAVE to enable XSAVE.
The patch adds per-vcpu host/guest xstate image/mask and enhances the
current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate
(FPU/SSE/YMM) switch.

Signed-off-by: Dexuan Cui <dexuan.cui@xxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |   15 +--
 arch/x86/include/asm/vmx.h      |    1 +
 arch/x86/include/asm/xsave.h    |    3 +
 arch/x86/kvm/vmx.c              |   24 +++++
 arch/x86/kvm/x86.c              |  217 +++++++++++++++++++++++++++++++++++++--
 5 files changed, 242 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f0007b..60be1a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -303,6 +303,11 @@ struct kvm_vcpu_arch {
 	struct i387_fxsave_struct host_fx_image;
 	struct i387_fxsave_struct guest_fx_image;
 
+	struct xsave_struct *host_xstate_image;
+	struct xsave_struct *guest_xstate_image;
+	uint64_t host_xstate_mask;
+	uint64_t guest_xstate_mask;
+
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
 	void *pio_data;
@@ -718,16 +723,6 @@ static inline unsigned long read_msr(unsigned long msr)
 }
 #endif
 
-static inline void kvm_fx_save(struct i387_fxsave_struct *image)
-{
-	asm("fxsave (%0)":: "r" (image));
-}
-
-static inline void kvm_fx_restore(struct i387_fxsave_struct *image)
-{
-	asm("fxrstor (%0)":: "r" (image));
-}
-
 static inline void kvm_fx_finit(void)
 {
 	asm("finit");
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index fb9a080..842286b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -260,6 +260,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
 
 /*
  * Interruption-information format
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index ddc04cc..ada81a2 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -13,6 +13,9 @@
 
 #define FXSAVE_SIZE	512
 
+#define XSTATE_YMM_SIZE 256
+#define XSTATE_YMM_OFFSET (512 + 64)
+
 /*
  * These are the features that the OS can handle currently.
  */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 875b785..a72d024 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -35,6 +35,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -2517,6 +2519,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
 	if (enable_ept)
 		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
+	if (cpu_has_xsave)
+		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE;
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 
 	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
@@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+	u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) |
+		kvm_register_read(vcpu, VCPU_REGS_RAX);
+	u64 host_bv = vcpu->arch.host_xstate_mask;
+
+	if (((new_bv ^ host_bv) & ~host_bv) || !(new_bv & 1))
+		goto err;
+	if ((host_bv & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE))
+		goto err;
+	vcpu->arch.guest_xstate_mask = new_bv;
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+	skip_emulated_instruction(vcpu);
+	return 1;
+err:
+	kvm_inject_gp(vcpu, 0);
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
@@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
+	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b2ce1d..2af3fbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -52,6 +52,8 @@
 #include <asm/desc.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -62,6 +64,7 @@
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
+			  | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)	\
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -4017,6 +4020,36 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
 
+static struct kmem_cache *kvm_xstate_cachep;
+static unsigned int kvm_xstate_size;
+
+static int kvm_alloc_xstate_cachep(void)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (!cpu_has_xsave)
+		return 0;
+
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	kvm_xstate_size = ebx;
+	kvm_xstate_cachep =
+		kmem_cache_create("kvm_vcpu_xstate", kvm_xstate_size,
+			__alignof__(union thread_xstate), 0, NULL);
+	if (!kvm_xstate_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void kvm_free_xstate_cachep(void)
+{
+	if (!kvm_xstate_cachep)
+		return;
+
+	kmem_cache_destroy(kvm_xstate_cachep);
+	kvm_xstate_cachep = NULL;
+}
+
 int kvm_arch_init(void *opaque)
 {
 	int r;
@@ -4039,6 +4072,10 @@ int kvm_arch_init(void *opaque)
 		goto out;
 	}
 
+	r = kvm_alloc_xstate_cachep();
+	if (r)
+		goto out;
+
 	r = kvm_mmu_module_init();
 	if (r)
 		goto out;
@@ -4058,6 +4095,7 @@ int kvm_arch_init(void *opaque)
 	return 0;
 
 out:
+	kvm_free_xstate_cachep();
 	return r;
 }
 
@@ -4070,6 +4108,7 @@ void kvm_arch_exit(void)
 					    CPUFREQ_TRANSITION_NOTIFIER);
 	kvm_x86_ops = NULL;
 	kvm_mmu_module_exit();
+	kvm_free_xstate_cachep();
 }
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -4307,6 +4346,65 @@ not_found:
 	return 36;
 }
 
+#define bitmaskof(idx)  (1U << ((idx) & 31))
+static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32 func, u32 idx)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (func != 0 && func != 1 && func != 0xd)
+		return;
+
+	eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+	ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
+	ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+	edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
+
+	switch (func) {
+	case 0:
+		/* fixup the Maximum Input Value */
+		if (cpu_has_xsave && eax < 0xd)
+			eax = 0xd;
+		break;
+	case 1:
+		ecx &= ~(bitmaskof(X86_FEATURE_XSAVE) |
+			bitmaskof(X86_FEATURE_OSXSAVE));
+		if (!cpu_has_xsave)
+			break;
+		ecx |= bitmaskof(X86_FEATURE_XSAVE);
+		if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE)
+			ecx |= bitmaskof(X86_FEATURE_OSXSAVE);
+		break;
+	case 0xd:
+		eax = ebx = ecx = edx = 0;
+		if (!cpu_has_xsave)
+			break;
+		switch (idx) {
+		case 0:
+			eax = vcpu->arch.host_xstate_mask & XCNTXT_MASK;
+			/* FP/SSE + XSAVE.HEADER + YMM. */
+			ecx = 512 + 64;
+			if (eax & XSTATE_YMM)
+				ecx += XSTATE_YMM_SIZE;
+			ebx = ecx;
+			break;
+		case 2:
+			if (!(vcpu->arch.host_xstate_mask & XSTATE_YMM))
+				break;
+			eax = XSTATE_YMM_SIZE;
+			ebx = XSTATE_YMM_OFFSET;
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+
+	kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
+	kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
+	kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
+	kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;
@@ -4325,6 +4423,9 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
 		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
 	}
+
+	kvm_emulate_cpuid_fixup(vcpu, function, index);
+
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
 	trace_kvm_cpuid(function,
 			kvm_register_read(vcpu, VCPU_REGS_RAX),
@@ -5091,6 +5192,60 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
+#define REX_PREFIX  "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)
+		: "memory");
+		vcpu->arch.host_xstate_mask =
+			xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	} else {
+		asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image));
+	}
+}
+
+static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)
+		: "memory");
+		vcpu->arch.guest_xstate_mask =
+			xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	} else {
+		asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+	}
+}
+
+static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask);
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image));
+	} else {
+		asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image));
+	}
+}
+
+static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image));
+	} else {
+		asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+	}
+}
+
 void fx_init(struct kvm_vcpu *vcpu)
 {
 	unsigned after_mxcsr_mask;
@@ -5102,17 +5257,21 @@ void fx_init(struct kvm_vcpu *vcpu)
 	 * allocate ram with GFP_KERNEL.
 	 */
 	if (!used_math())
-		kvm_fx_save(&vcpu->arch.host_fx_image);
+		kvm_fx_save_host(vcpu);
 
 	/* Initialize guest FPU by resetting ours and saving into guest's */
 	preempt_disable();
-	kvm_fx_save(&vcpu->arch.host_fx_image);
+	kvm_fx_save_host(vcpu);
 	kvm_fx_finit();
-	kvm_fx_save(&vcpu->arch.guest_fx_image);
-	kvm_fx_restore(&vcpu->arch.host_fx_image);
+	kvm_fx_save_guest(vcpu);
+	kvm_fx_restore_host(vcpu);
 	preempt_enable();
 
 	vcpu->arch.cr0 |= X86_CR0_ET;
+
+	if (cpu_has_xsave)
+		return;
+
 	after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
 	vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
 	memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
@@ -5126,8 +5285,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 1;
-	kvm_fx_save(&vcpu->arch.host_fx_image);
-	kvm_fx_restore(&vcpu->arch.guest_fx_image);
+	kvm_fx_save_host(vcpu);
+	kvm_fx_restore_guest(vcpu);
 	trace_kvm_fpu(1);
 }
 
@@ -5137,13 +5296,50 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 0;
-	kvm_fx_save(&vcpu->arch.guest_fx_image);
-	kvm_fx_restore(&vcpu->arch.host_fx_image);
+	kvm_fx_save_guest(vcpu);
+	kvm_fx_restore_host(vcpu);
 	++vcpu->stat.fpu_reload;
 	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 	trace_kvm_fpu(0);
 }
 
+static void kvm_arch_vcpu_destroy_xstate_image(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.guest_xstate_image)
+		kmem_cache_free(kvm_xstate_cachep,
+			vcpu->arch.guest_xstate_image);
+	if (vcpu->arch.host_xstate_image)
+		kmem_cache_free(kvm_xstate_cachep,
+			vcpu->arch.host_xstate_image);
+	vcpu->arch.guest_xstate_image = NULL;
+	vcpu->arch.host_xstate_image = NULL;
+}
+
+static int kvm_arch_vcpu_create_xstate_image(struct kvm_vcpu *vcpu)
+{
+	if (!cpu_has_xsave)
+		return 0;
+
+	if (!vcpu->arch.guest_xstate_image) {
+		vcpu->arch.guest_xstate_image =
+			kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL);
+		if (!vcpu->arch.guest_xstate_image)
+			goto err;
+	}
+	if (!vcpu->arch.host_xstate_image) {
+		vcpu->arch.host_xstate_image =
+			kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL);
+		if (!vcpu->arch.host_xstate_image)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	kvm_arch_vcpu_destroy_xstate_image(vcpu);
+	return -ENOMEM;
+}
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.time_page) {
@@ -5152,6 +5348,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	}
 
 	kvm_x86_ops->vcpu_free(vcpu);
+	kvm_arch_vcpu_destroy_xstate_image(vcpu);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -5189,6 +5386,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	vcpu_put(vcpu);
 
 	kvm_x86_ops->vcpu_free(vcpu);
+	kvm_arch_vcpu_destroy_xstate_image(vcpu);
 }
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -5201,6 +5399,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.dr6 = DR6_FIXED_1;
 	vcpu->arch.dr7 = DR7_FIXED_1;
 
+	if (kvm_arch_vcpu_create_xstate_image(vcpu) < 0)
+		return -ENOMEM;
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-- 
1.6.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux