On Thursday 20 May 2010 17:46:40 Avi Kivity wrote: > On 05/20/2010 12:16 PM, Sheng Yang wrote: > > From: Dexuan Cui<dexuan.cui@xxxxxxxxx> > > > > Enable XSAVE/XRSTORE for guest. > > > > Change from V2: > > Addressed comments from Avi. > > > > Change from V1: > > > > 1. Use FPU API. > > 2. Fix CPUID issue. > > 3. Save/restore all possible guest xstate fields when switching. Because > > we don't know which fields guest has already touched. > > > > > > diff --git a/arch/x86/include/asm/kvm_host.h > > b/arch/x86/include/asm/kvm_host.h index d08bb4a..3938bd1 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -302,6 +302,7 @@ struct kvm_vcpu_arch { > > > > } update_pte; > > > > struct fpu guest_fpu; > > > > + u64 xcr0; > > > > gva_t mmio_fault_cr2; > > struct kvm_pio_request pio; > > > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > > index 9e6779f..346ea66 100644 > > --- a/arch/x86/include/asm/vmx.h > > +++ b/arch/x86/include/asm/vmx.h > > @@ -266,6 +266,7 @@ enum vmcs_field { > > > > #define EXIT_REASON_EPT_VIOLATION 48 > > #define EXIT_REASON_EPT_MISCONFIG 49 > > #define EXIT_REASON_WBINVD 54 > > > > +#define EXIT_REASON_XSETBV 55 > > > > /* > > > > * Interruption-information format > > > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index 99ae513..a63f206 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -36,6 +36,8 @@ > > > > #include<asm/vmx.h> > > #include<asm/virtext.h> > > #include<asm/mce.h> > > > > +#include<asm/i387.h> > > +#include<asm/xcr.h> > > > > #include "trace.h" > > > > @@ -247,6 +249,9 @@ static const u32 vmx_msr_index[] = { > > > > }; > > #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) > > > > +#define MERGE_TO_U64(low, high) \ > > + (((low)& -1u) | ((u64)((high)& -1u)<< 32)) > > + > > static inline u64 kvm_read_edx_eax(vcpu) in cache_regs.h > > > +static int handle_xsetbv(struct kvm_vcpu *vcpu) > > +{ > > + u64 new_bv = MERGE_TO_U64(kvm_register_read(vcpu, VCPU_REGS_RAX), > > + kvm_register_read(vcpu, VCPU_REGS_RDX)); > > + > > + if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0) > > + goto err; > > + if (vmx_get_cpl(vcpu) != 0) > > + goto err; > > + if (!(new_bv& XSTATE_FP)) > > + goto err; > > + if ((new_bv& XSTATE_YMM)&& !(new_bv& XSTATE_SSE)) > > + goto err; > > What about a check against unknown bits? > > > + vcpu->arch.xcr0 = new_bv; > > + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); > > + skip_emulated_instruction(vcpu); > > + return 1; > > +err: > > + kvm_inject_gp(vcpu, 0); > > + return 1; > > +} > > + > > > > static int handle_apic_access(struct kvm_vcpu *vcpu) > > { > > > > return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; > > > > +static u64 host_xcr0; > > __read_mostly. > > > + > > +static void update_cpuid(struct kvm_vcpu *vcpu) > > +{ > > + struct kvm_cpuid_entry2 *best; > > + > > + best = kvm_find_cpuid_entry(vcpu, 1, 0); > > + if (!best) > > + return; > > + > > + /* Update OSXSAVE bit */ > > + if (cpu_has_xsave&& best->function == 0x1) { > > + best->ecx&= ~(bit(X86_FEATURE_OSXSAVE)); > > + if (kvm_read_cr4(vcpu)& X86_CR4_OSXSAVE) > > + best->ecx |= bit(X86_FEATURE_OSXSAVE); > > + } > > +} > > Note: need to update after userspace writes cpuid as well. Not quite understand. Userspace set OSXSAVE should be trimmed IMO... > > > + > > > > int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) > > { > > > > unsigned long old_cr4 = kvm_read_cr4(vcpu); > > > > @@ -481,6 +513,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned > > long cr4) > > > > if (cr4& CR4_RESERVED_BITS) > > > > return 1; > > > > + if (!guest_cpuid_has_xsave(vcpu)&& (cr4& X86_CR4_OSXSAVE)) > > + return 1; > > + > > > > if (is_long_mode(vcpu)) { > > > > if (!(cr4& X86_CR4_PAE)) > > > > return 1; > > > > @@ -497,6 +532,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned > > long cr4) > > > > if ((cr4 ^ old_cr4)& pdptr_bits) > > > > kvm_mmu_reset_context(vcpu); > > > > + if ((cr4 ^ old_cr4)& X86_CR4_OSXSAVE) > > + update_cpuid(vcpu); > > + > > I think we need to reload the guest's xcr0 at this point. > Alternatively, call vmx_load_host_state() to ensure the the next entry > will reload it. Current xcr0 would be loaded when next vmentry. And if we use prepare_guest_switch(), how about SVM? > > > @@ -1931,7 +1964,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 > > *entry, u32 function, > > > > switch (function) { > > > > case 0: > > - entry->eax = min(entry->eax, (u32)0xb); > > + entry->eax = min(entry->eax, (u32)0xd); > > Do we need any special handling for leaf 0xc? Don't think so. CPUID would return all 0 for it. > > > @@ -4567,6 +4616,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > > > > kvm_x86_ops->prepare_guest_switch(vcpu); > > if (vcpu->fpu_active) > > > > kvm_load_guest_fpu(vcpu); > > > > + if (kvm_read_cr4(vcpu)& X86_CR4_OSXSAVE) > > + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); > > Better done in vmx_save_host_state(), so we only do it on context > switches or entries from userspace. > > kvm_read_cr4_bits() is faster - doesn't need a vmcs_readl(). > > > atomic_set(&vcpu->guest_mode, 1); > > smp_wmb(); > > > > @@ -5118,6 +5169,10 @@ void fx_init(struct kvm_vcpu *vcpu) > > > > fpu_alloc(&vcpu->arch.guest_fpu); > > fpu_finit(&vcpu->arch.guest_fpu); > > > > + /* Ensure guest xcr0 is valid for loading */ > > + if (cpu_has_xsave) > > + vcpu->arch.xcr0 = XSTATE_FP; > > + > > Can do it unconditionally, not that it matters much. > > > void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) > > { > > > > if (vcpu->guest_fpu_loaded) > > > > @@ -5134,6 +5197,10 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) > > > > vcpu->guest_fpu_loaded = 1; > > unlazy_fpu(current); > > > > + /* Restore all possible states in the guest */ > > + if (cpu_has_xsave&& guest_cpuid_has_xsave(vcpu)) > > + xsetbv(XCR_XFEATURE_ENABLED_MASK, > > + cpuid_get_possible_xcr0(vcpu)); > > Best to calculate it out of the fast path, when guest cpuid is set. > Need to check it at this time as well. You mean guest_cpuid_has_xsave()? Not quite understand the point here... > > Also can avoid it if guest xcr0 == host xcr0. I don't know the assumption that "host use all possible xcr0 bits" can apply. If so, only use host_xcr0 should be fine. Would update other points. Thanks. -- regards Yang, Sheng > > > fpu_restore_checking(&vcpu->arch.guest_fpu); > > trace_kvm_fpu(1); > > > > } > > > > @@ -5144,7 +5211,14 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) > > > > return; > > > > vcpu->guest_fpu_loaded = 0; > > > > + /* Save all possible states in the guest */ > > + if (cpu_has_xsave&& guest_cpuid_has_xsave(vcpu)) > > + xsetbv(XCR_XFEATURE_ENABLED_MASK, > > + cpuid_get_possible_xcr0(vcpu)); > > Ditto. > > > fpu_save_init(&vcpu->arch.guest_fpu); > > > > + if (cpu_has_xsave) > > + xsetbv(XCR_XFEATURE_ENABLED_MASK, > > + host_xcr0); > > > > ++vcpu->stat.fpu_reload; > > set_bit(KVM_REQ_DEACTIVATE_FPU,&vcpu->requests); > > trace_kvm_fpu(0); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html