On 04/29/2010 08:22 AM, Dexuan Cui wrote:
When the host enables XSAVE/XRSTOR, the patch exposes the XSAVE/XRSTOR
related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and the
patch allows guest to set CR4.OSXSAVE to enable XSAVE.
The patch adds per-vcpu host/guest xstate image/mask and enhances the
current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate
(FPU/SSE/YMM) switch.
5 files changed, 242 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f0007b..60be1a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -303,6 +303,11 @@ struct kvm_vcpu_arch {
struct i387_fxsave_struct host_fx_image;
struct i387_fxsave_struct guest_fx_image;
+ struct xsave_struct *host_xstate_image;
+ struct xsave_struct *guest_xstate_image;
+ uint64_t host_xstate_mask;
Does host_xstate_mask need to be per-vcpu, or can it be global?
+ uint64_t guest_xstate_mask;
Can be called xcr0, like other shadow registers.
+
gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
@@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
return 1;
}
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+ u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) |
+ kvm_register_read(vcpu, VCPU_REGS_RAX);
Missing shift?
Probably worthwhile to create a helper for reading/writing edx:eax into
a u64.
+ u64 host_bv = vcpu->arch.host_xstate_mask;
What about ecx?
+
+ if (((new_bv ^ host_bv)& ~host_bv)
Isn't (new_bv & ~host_bv) equivalent? (guest cannot exceed host...)
|| !(new_bv& 1))
Symbolic value or comment.
+ goto err;
+ if ((host_bv& XSTATE_YMM& new_bv)&& !(new_bv& XSTATE_SSE))
host_bv unneeded, I think.
+ goto err;
+ vcpu->arch.guest_xstate_mask = new_bv;
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
Can't we run with the host xcr0? isn't it guaranteed to be a superset
of the guest xcr0?
+ skip_emulated_instruction(vcpu);
+ return 1;
+err:
+ kvm_inject_gp(vcpu, 0);
Need to #UD in some circumstances.
+ return 1;
+}
+
static int handle_apic_access(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
@@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
[EXIT_REASON_WBINVD] = handle_wbinvd,
+ [EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b2ce1d..2af3fbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -52,6 +52,8 @@
#include<asm/desc.h>
#include<asm/mtrr.h>
#include<asm/mce.h>
+#include<asm/i387.h>
+#include<asm/xcr.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
@@ -62,6 +64,7 @@
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
+ | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0) \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
It also depends on the guest cpuid value. Please add it outside the
macro, it's confusing to read something that looks like a constant but
isn't.
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -4307,6 +4346,65 @@ not_found:
return 36;
}
+#define bitmaskof(idx) (1U<< ((idx)& 31))
+static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32 func, u32 idx)
+{
+ u32 eax, ebx, ecx, edx;
+
+ if (func != 0&& func != 1&& func != 0xd)
+ return;
+
+ eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
+ ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
+
+ switch (func) {
+ case 0:
+ /* fixup the Maximum Input Value */
+ if (cpu_has_xsave&& eax< 0xd)
+ eax = 0xd;
+ break;
+ case 1:
+ ecx&= ~(bitmaskof(X86_FEATURE_XSAVE) |
+ bitmaskof(X86_FEATURE_OSXSAVE));
+ if (!cpu_has_xsave)
+ break;
+ ecx |= bitmaskof(X86_FEATURE_XSAVE);
+ if (kvm_read_cr4(vcpu)& X86_CR4_OSXSAVE)
+ ecx |= bitmaskof(X86_FEATURE_OSXSAVE);
+ break;
+ case 0xd:
+ eax = ebx = ecx = edx = 0;
+ if (!cpu_has_xsave)
+ break;
+ switch (idx) {
+ case 0:
+ eax = vcpu->arch.host_xstate_mask& XCNTXT_MASK;
+ /* FP/SSE + XSAVE.HEADER + YMM. */
+ ecx = 512 + 64;
+ if (eax& XSTATE_YMM)
+ ecx += XSTATE_YMM_SIZE;
+ ebx = ecx;
+ break;
+ case 2:
+ if (!(vcpu->arch.host_xstate_mask& XSTATE_YMM))
+ break;
+ eax = XSTATE_YMM_SIZE;
+ ebx = XSTATE_YMM_OFFSET;
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+
+ kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
+ kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+}
This should be part of KVM_GET_SUPPORTED_CPUID.@@ -5091,6 +5192,60 @@
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
+#ifdef CONFIG_X86_64
+#define REX_PREFIX "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_xsave) {
+ asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+ : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)
+ : "memory");
+ vcpu->arch.host_xstate_mask =
+ xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ } else {
+ asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image));
+ }
+}
+
+static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_xsave) {
+ asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+ : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)
+ : "memory");
+ vcpu->arch.guest_xstate_mask =
+ xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ } else {
+ asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+ }
+}
+
+static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_xsave) {
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask);
+ asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+ : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image));
+ } else {
+ asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image));
+ }
+}
+
+static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_xsave) {
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+ asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+ : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image));
+ } else {
+ asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+ }
+}
+
This mostly duplicates the standard x86 fpu code. I have a patch
somewhere that abstracts it out, I'll dig it up and send it out.
--
error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html