On 05/20/2010 12:16 PM, Sheng Yang wrote:
From: Dexuan Cui<dexuan.cui@xxxxxxxxx> Enable XSAVE/XRSTORE for guest. Change from V2: Addressed comments from Avi. Change from V1: 1. Use FPU API. 2. Fix CPUID issue. 3. Save/restore all possible guest xstate fields when switching. Because we don't know which fields guest has already touched. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d08bb4a..3938bd1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -302,6 +302,7 @@ struct kvm_vcpu_arch { } update_pte; struct fpu guest_fpu; + u64 xcr0; gva_t mmio_fault_cr2; struct kvm_pio_request pio; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f..346ea66 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -266,6 +266,7 @@ enum vmcs_field { #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 /* * Interruption-information format diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 99ae513..a63f206 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -36,6 +36,8 @@ #include<asm/vmx.h> #include<asm/virtext.h> #include<asm/mce.h> +#include<asm/i387.h> +#include<asm/xcr.h> #include "trace.h" @@ -247,6 +249,9 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +#define MERGE_TO_U64(low, high) \ + (((low)& -1u) | ((u64)((high)& -1u)<< 32)) +
static inline u64 kvm_read_edx_eax(vcpu) in cache_regs.h
+static int handle_xsetbv(struct kvm_vcpu *vcpu) +{ + u64 new_bv = MERGE_TO_U64(kvm_register_read(vcpu, VCPU_REGS_RAX), + kvm_register_read(vcpu, VCPU_REGS_RDX)); + + if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0) + goto err; + if (vmx_get_cpl(vcpu) != 0) + goto err; + if (!(new_bv& XSTATE_FP)) + goto err; + if ((new_bv& XSTATE_YMM)&& !(new_bv& XSTATE_SSE)) + goto err;
What about a check against unknown bits?
+ vcpu->arch.xcr0 = new_bv; + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); + skip_emulated_instruction(vcpu); + return 1; +err: + kvm_inject_gp(vcpu, 0); + return 1; +} + static int handle_apic_access(struct kvm_vcpu *vcpu) { return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; +static u64 host_xcr0;
__read_mostly.
+ +static void update_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 1, 0); + if (!best) + return; + + /* Update OSXSAVE bit */ + if (cpu_has_xsave&& best->function == 0x1) { + best->ecx&= ~(bit(X86_FEATURE_OSXSAVE)); + if (kvm_read_cr4(vcpu)& X86_CR4_OSXSAVE) + best->ecx |= bit(X86_FEATURE_OSXSAVE); + } +}
Note: need to update after userspace writes cpuid as well.
+ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); @@ -481,6 +513,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4& CR4_RESERVED_BITS) return 1; + if (!guest_cpuid_has_xsave(vcpu)&& (cr4& X86_CR4_OSXSAVE)) + return 1; + if (is_long_mode(vcpu)) { if (!(cr4& X86_CR4_PAE)) return 1; @@ -497,6 +532,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if ((cr4 ^ old_cr4)& pdptr_bits) kvm_mmu_reset_context(vcpu); + if ((cr4 ^ old_cr4)& X86_CR4_OSXSAVE) + update_cpuid(vcpu); +
I think we need to reload the guest's xcr0 at this point. Alternatively, call vmx_load_host_state() to ensure the the next entry will reload it.
@@ -1931,7 +1964,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, switch (function) { case 0: - entry->eax = min(entry->eax, (u32)0xb); + entry->eax = min(entry->eax, (u32)0xd);
Do we need any special handling for leaf 0xc?
@@ -4567,6 +4616,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); + if (kvm_read_cr4(vcpu)& X86_CR4_OSXSAVE) + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
Better done in vmx_save_host_state(), so we only do it on context switches or entries from userspace.
kvm_read_cr4_bits() is faster - doesn't need a vmcs_readl().
atomic_set(&vcpu->guest_mode, 1); smp_wmb(); @@ -5118,6 +5169,10 @@ void fx_init(struct kvm_vcpu *vcpu) fpu_alloc(&vcpu->arch.guest_fpu); fpu_finit(&vcpu->arch.guest_fpu); + /* Ensure guest xcr0 is valid for loading */ + if (cpu_has_xsave) + vcpu->arch.xcr0 = XSTATE_FP; +
Can do it unconditionally, not that it matters much.
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { if (vcpu->guest_fpu_loaded) @@ -5134,6 +5197,10 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 1; unlazy_fpu(current); + /* Restore all possible states in the guest */ + if (cpu_has_xsave&& guest_cpuid_has_xsave(vcpu)) + xsetbv(XCR_XFEATURE_ENABLED_MASK, + cpuid_get_possible_xcr0(vcpu));
Best to calculate it out of the fast path, when guest cpuid is set. Need to check it at this time as well.
Also can avoid it if guest xcr0 == host xcr0.
fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5144,7 +5211,14 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; + /* Save all possible states in the guest */ + if (cpu_has_xsave&& guest_cpuid_has_xsave(vcpu)) + xsetbv(XCR_XFEATURE_ENABLED_MASK, + cpuid_get_possible_xcr0(vcpu));
Ditto.
fpu_save_init(&vcpu->arch.guest_fpu); + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, + host_xcr0); ++vcpu->stat.fpu_reload; set_bit(KVM_REQ_DEACTIVATE_FPU,&vcpu->requests); trace_kvm_fpu(0);
-- Do not meddle in the internals of kernels, for they are subtle and quick to panic. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html