Re: [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 05/19/2010 11:34 AM, Sheng Yang wrote:
From: Dexuan Cui<dexuan.cui@xxxxxxxxx>

Enable XSAVE/XRSTORE for guest.

Change from V1:

1. Use FPU API.
2. Fix CPUID issue.
3. Save/restore all possible guest xstate fields when switching. Because we
don't know which fields guest has already touched.

Signed-off-by: Dexuan Cui<dexuan.cui@xxxxxxxxx>
Signed-off-by: Sheng Yang<sheng@xxxxxxxxxxxxxxx>
---
  arch/x86/include/asm/kvm_host.h |    1 +
  arch/x86/include/asm/vmx.h      |    1 +
  arch/x86/kvm/vmx.c              |   28 +++++++++++++
  arch/x86/kvm/x86.c              |   85 +++++++++++++++++++++++++++++++++++---
  4 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d08bb4a..78d7b06 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
  	} update_pte;

  	struct fpu guest_fpu;
+	uint64_t xcr0, host_xcr0;

host_xcr0 can be a global.

  /*
   * Interruption-information format
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 99ae513..2ee8ff6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -36,6 +36,8 @@
  #include<asm/vmx.h>
  #include<asm/virtext.h>
  #include<asm/mce.h>
+#include<asm/i387.h>
+#include<asm/xcr.h>

  #include "trace.h"

@@ -2616,6 +2618,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
  	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
  	if (enable_ept)
  		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
+	if (cpu_has_xsave)
+		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE;

First, we should only allow the guest to play with cr4.osxsave if guest_has_xsave in cpuid; otherwise we need to #GP if the guest sets it. Second, it may be better to trap when the guest sets it (should be rare); this way, we only need to save/restore xcr0 if the guest has enabled cr4.osxsave.

  	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);

  	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
@@ -3354,6 +3358,29 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
  	return 1;
  }

+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+	u64 new_bv = ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX)<<  32)) |
+		kvm_register_read(vcpu, VCPU_REGS_RAX);

I think you need to trim the upper 32 bits of rax.

Please introduce helpers for reading edx:eax into a u64 and vice versa. We can then use the helpers here and in the msr code.

+
+	if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
+		goto err;
+	if (vmx_get_cpl(vcpu) != 0)
+		goto err;
+	if (!(new_bv&  XSTATE_FP) ||
+	     (new_bv&  ~vcpu->arch.host_xcr0))
+		goto err;
+	if ((new_bv&  XSTATE_YMM)&&  !(new_bv&  XSTATE_SSE))
+		goto err;

This is a little worrying. What if a new bit is introduced later that depends on other bits? We'll need to add a dependency between ZMM and YMM or whatever, and old versions will be broken.

So I think we need to check xcr0 not against host_xcr0 but instead against a whitelist of xcr0 bits that we know how to handle (currently fpu, see, and ymm).

+	vcpu->arch.xcr0 = new_bv;
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+	skip_emulated_instruction(vcpu);
+	return 1;
+err:
+	kvm_inject_gp(vcpu, 0);
+	return 1;
+}
+

@@ -149,6 +150,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
  	{ NULL }
  };

+static inline u32 bit(int bitno)
+{
+	return 1<<  (bitno&  31);
+}
+
  static void kvm_on_user_return(struct user_return_notifier *urn)
  {
  	unsigned slot;
@@ -473,6 +479,17 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
  }
  EXPORT_SYMBOL_GPL(kvm_lmsw);

+static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	if (best->ecx&  bit(X86_FEATURE_XSAVE))

Sanity:  if (best && ...)

+		return true;
+
+	return false;

Can avoid the if (): return best && (best->ecx & ...);

+}
+
  int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
  	unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -481,6 +498,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  	if (cr4&  CR4_RESERVED_BITS)
  		return 1;

+	if (!guest_cpuid_has_xsave(vcpu)&&  X86_CR4_OSXSAVE)

s/&&.*//

+		return 1;
+
  	if (is_long_mode(vcpu)) {
  		if (!(cr4&  X86_CR4_PAE))

  			return 1;

@@ -1887,6 +1902,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
  	unsigned f_lm = 0;
  #endif
  	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
+	unsigned f_xsave = cpu_has_xsave ? F(XSAVE) : 0;

  	/* cpuid 1.edx */
  	const u32 kvm_supported_word0_x86_features =
@@ -1916,7 +1932,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
  		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
  		0 /* Reserved, DCA */ | F(XMM4_1) |
  		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-		0 /* Reserved, XSAVE, OSXSAVE */;
+		0 /* Reserved, AES */ | f_xsave | 0 /* OSXSAVE */;

Enough to put F(XSAVE) there, no? The code should mask it out if not present, like XMM4_2.


+static void kvm_update_cpuid(struct kvm_vcpu *vcpu,
+			     struct kvm_cpuid_entry2 *best)
+{
+	/* Update OSXSAVE bit */
+	if (cpu_has_xsave&&  best->function == 0x1) {
+		best->ecx&= ~(bit(X86_FEATURE_OSXSAVE));
+		if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
+			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+	}
+}
+
  void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
  {
  	u32 function, index;
@@ -4389,6 +4430,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
  	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
  	best = kvm_find_cpuid_entry(vcpu, function, index);
  	if (best) {
+		kvm_update_cpuid(vcpu, best);

Slightly faster to do it at kvm_set_cr4() time.  Not sure it matters.

  		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
  		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
  		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
@@ -5118,6 +5160,11 @@ void fx_init(struct kvm_vcpu *vcpu)
  	fpu_alloc(&vcpu->arch.guest_fpu);
  	fpu_finit(&vcpu->arch.guest_fpu);

+	if (cpu_has_xsave) {
+		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+		vcpu->arch.xcr0 = vcpu->arch.host_xcr0;

Should be initialized to the default value.

  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  {
  	if (vcpu->guest_fpu_loaded)
  		return;

  	vcpu->guest_fpu_loaded = 1;
+	if (cpu_has_xsave)
+		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);

Why read it every time?

  	unlazy_fpu(current);
+	/* Restore all possible states in the guest */
+	if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
+		xsetbv(XCR_XFEATURE_ENABLED_MASK,
+			cpuid_get_possible_xcr0(vcpu));
  	fpu_restore_checking(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsave)

if guest enabled xsave...

+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);

Need to do it on every entry, not just fpu reload, since xgetbv does not check cr0.ts.

Need to add save/restore support for xcrs.

Need to add save/restore support for xsave state.

Please send a test case for this (see qemu-kvm.git user/test/x86 for examples), to be run twice: once with -cpu host,-xsave and once with -cpu host,+xsave.

Things to check:

- Set cr4.xsave without cpuid.xsave -> #GP
- Set cr4.xsave with cpuid.xsave -> works, sets cr4.xsave, sets cpuid.osxsave
- clearing cr4.xsave
- xsetbv/xgetbv/xsave/xrstor with cr4.xsave enabled/disabled
- interdepdencies between xcr0 bits (fpu, sse, ymm), illegal combinations, illegal bits, illegal xcrs
- anything else you can think of...


--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux