When the host enables XSAVE/XRSTOR, the patch exposes the XSAVE/XRSTOR related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and the patch allows guest to set CR4.OSXSAVE to enable XSAVE. The patch adds per-vcpu host/guest xstate image/mask and enhances the current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate (FPU/SSE/YMM) switch. Signed-off-by: Dexuan Cui <dexuan.cui@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 15 +-- arch/x86/include/asm/vmx.h | 1 + arch/x86/include/asm/xsave.h | 3 + arch/x86/kvm/vmx.c | 24 +++++ arch/x86/kvm/x86.c | 217 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 242 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3f0007b..60be1a7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -303,6 +303,11 @@ struct kvm_vcpu_arch { struct i387_fxsave_struct host_fx_image; struct i387_fxsave_struct guest_fx_image; + struct xsave_struct *host_xstate_image; + struct xsave_struct *guest_xstate_image; + uint64_t host_xstate_mask; + uint64_t guest_xstate_mask; + gva_t mmio_fault_cr2; struct kvm_pio_request pio; void *pio_data; @@ -718,16 +723,6 @@ static inline unsigned long read_msr(unsigned long msr) } #endif -static inline void kvm_fx_save(struct i387_fxsave_struct *image) -{ - asm("fxsave (%0)":: "r" (image)); -} - -static inline void kvm_fx_restore(struct i387_fxsave_struct *image) -{ - asm("fxrstor (%0)":: "r" (image)); -} - static inline void kvm_fx_finit(void) { asm("finit"); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fb9a080..842286b 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -260,6 +260,7 @@ enum vmcs_field { #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 /* * Interruption-information format diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ddc04cc..ada81a2 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -13,6 +13,9 @@ #define FXSAVE_SIZE 512 +#define XSTATE_YMM_SIZE 256 +#define XSTATE_YMM_OFFSET (512 + 64) + /* * These are the features that the OS can handle currently. */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 875b785..a72d024 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -35,6 +35,8 @@ #include <asm/vmx.h> #include <asm/virtext.h> #include <asm/mce.h> +#include <asm/i387.h> +#include <asm/xcr.h> #include "trace.h" @@ -2517,6 +2519,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; if (enable_ept) vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; + if (cpu_has_xsave) + vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE; vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; @@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu) return 1; } +static int handle_xsetbv(struct kvm_vcpu *vcpu) +{ + u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) | + kvm_register_read(vcpu, VCPU_REGS_RAX); + u64 host_bv = vcpu->arch.host_xstate_mask; + + if (((new_bv ^ host_bv) & ~host_bv) || !(new_bv & 1)) + goto err; + if ((host_bv & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE)) + goto err; + vcpu->arch.guest_xstate_mask = new_bv; + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask); + skip_emulated_instruction(vcpu); + return 1; +err: + kvm_inject_gp(vcpu, 0); + return 1; +} + static int handle_apic_access(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; @@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, + [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6b2ce1d..2af3fbe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -52,6 +52,8 @@ #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/mce.h> +#include <asm/i387.h> +#include <asm/xcr.h> #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -62,6 +64,7 @@ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0) \ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -4017,6 +4020,36 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); +static struct kmem_cache *kvm_xstate_cachep; +static unsigned int kvm_xstate_size; + +static int kvm_alloc_xstate_cachep(void) +{ + u32 eax, ebx, ecx, edx; + + if (!cpu_has_xsave) + return 0; + + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + kvm_xstate_size = ebx; + kvm_xstate_cachep = + kmem_cache_create("kvm_vcpu_xstate", kvm_xstate_size, + __alignof__(union thread_xstate), 0, NULL); + if (!kvm_xstate_cachep) + return -ENOMEM; + + return 0; +} + +static void kvm_free_xstate_cachep(void) +{ + if (!kvm_xstate_cachep) + return; + + kmem_cache_destroy(kvm_xstate_cachep); + kvm_xstate_cachep = NULL; +} + int kvm_arch_init(void *opaque) { int r; @@ -4039,6 +4072,10 @@ int kvm_arch_init(void *opaque) goto out; } + r = kvm_alloc_xstate_cachep(); + if (r) + goto out; + r = kvm_mmu_module_init(); if (r) goto out; @@ -4058,6 +4095,7 @@ int kvm_arch_init(void *opaque) return 0; out: + kvm_free_xstate_cachep(); return r; } @@ -4070,6 +4108,7 @@ void kvm_arch_exit(void) CPUFREQ_TRANSITION_NOTIFIER); kvm_x86_ops = NULL; kvm_mmu_module_exit(); + kvm_free_xstate_cachep(); } int kvm_emulate_halt(struct kvm_vcpu *vcpu) @@ -4307,6 +4346,65 @@ not_found: return 36; } +#define bitmaskof(idx) (1U << ((idx) & 31)) +static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32 func, u32 idx) +{ + u32 eax, ebx, ecx, edx; + + if (func != 0 && func != 1 && func != 0xd) + return; + + eax = kvm_register_read(vcpu, VCPU_REGS_RAX); + ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); + ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); + edx = kvm_register_read(vcpu, VCPU_REGS_RDX); + + switch (func) { + case 0: + /* fixup the Maximum Input Value */ + if (cpu_has_xsave && eax < 0xd) + eax = 0xd; + break; + case 1: + ecx &= ~(bitmaskof(X86_FEATURE_XSAVE) | + bitmaskof(X86_FEATURE_OSXSAVE)); + if (!cpu_has_xsave) + break; + ecx |= bitmaskof(X86_FEATURE_XSAVE); + if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE) + ecx |= bitmaskof(X86_FEATURE_OSXSAVE); + break; + case 0xd: + eax = ebx = ecx = edx = 0; + if (!cpu_has_xsave) + break; + switch (idx) { + case 0: + eax = vcpu->arch.host_xstate_mask & XCNTXT_MASK; + /* FP/SSE + XSAVE.HEADER + YMM. */ + ecx = 512 + 64; + if (eax & XSTATE_YMM) + ecx += XSTATE_YMM_SIZE; + ebx = ecx; + break; + case 2: + if (!(vcpu->arch.host_xstate_mask & XSTATE_YMM)) + break; + eax = XSTATE_YMM_SIZE; + ebx = XSTATE_YMM_OFFSET; + break; + default: + break; + } + break; + } + + kvm_register_write(vcpu, VCPU_REGS_RAX, eax); + kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); + kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); + kvm_register_write(vcpu, VCPU_REGS_RDX, edx); +} + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 function, index; @@ -4325,6 +4423,9 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); } + + kvm_emulate_cpuid_fixup(vcpu, function, index); + kvm_x86_ops->skip_emulated_instruction(vcpu); trace_kvm_cpuid(function, kvm_register_read(vcpu, VCPU_REGS_RAX), @@ -5091,6 +5192,60 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu) +{ + if (cpu_has_xsave) { + asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27" + : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image) + : "memory"); + vcpu->arch.host_xstate_mask = + xgetbv(XCR_XFEATURE_ENABLED_MASK); + } else { + asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image)); + } +} + +static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu) +{ + if (cpu_has_xsave) { + asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27" + : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image) + : "memory"); + vcpu->arch.guest_xstate_mask = + xgetbv(XCR_XFEATURE_ENABLED_MASK); + } else { + asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image)); + } +} + +static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu) +{ + if (cpu_has_xsave) { + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask); + asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f" + : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)); + } else { + asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image)); + } +} + +static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu) +{ + if (cpu_has_xsave) { + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask); + asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f" + : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)); + } else { + asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image)); + } +} + void fx_init(struct kvm_vcpu *vcpu) { unsigned after_mxcsr_mask; @@ -5102,17 +5257,21 @@ void fx_init(struct kvm_vcpu *vcpu) * allocate ram with GFP_KERNEL. */ if (!used_math()) - kvm_fx_save(&vcpu->arch.host_fx_image); + kvm_fx_save_host(vcpu); /* Initialize guest FPU by resetting ours and saving into guest's */ preempt_disable(); - kvm_fx_save(&vcpu->arch.host_fx_image); + kvm_fx_save_host(vcpu); kvm_fx_finit(); - kvm_fx_save(&vcpu->arch.guest_fx_image); - kvm_fx_restore(&vcpu->arch.host_fx_image); + kvm_fx_save_guest(vcpu); + kvm_fx_restore_host(vcpu); preempt_enable(); vcpu->arch.cr0 |= X86_CR0_ET; + + if (cpu_has_xsave) + return; + after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); vcpu->arch.guest_fx_image.mxcsr = 0x1f80; memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, @@ -5126,8 +5285,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 1; - kvm_fx_save(&vcpu->arch.host_fx_image); - kvm_fx_restore(&vcpu->arch.guest_fx_image); + kvm_fx_save_host(vcpu); + kvm_fx_restore_guest(vcpu); trace_kvm_fpu(1); } @@ -5137,13 +5296,50 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; - kvm_fx_save(&vcpu->arch.guest_fx_image); - kvm_fx_restore(&vcpu->arch.host_fx_image); + kvm_fx_save_guest(vcpu); + kvm_fx_restore_host(vcpu); ++vcpu->stat.fpu_reload; set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); trace_kvm_fpu(0); } +static void kvm_arch_vcpu_destroy_xstate_image(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.guest_xstate_image) + kmem_cache_free(kvm_xstate_cachep, + vcpu->arch.guest_xstate_image); + if (vcpu->arch.host_xstate_image) + kmem_cache_free(kvm_xstate_cachep, + vcpu->arch.host_xstate_image); + vcpu->arch.guest_xstate_image = NULL; + vcpu->arch.host_xstate_image = NULL; +} + +static int kvm_arch_vcpu_create_xstate_image(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_xsave) + return 0; + + if (!vcpu->arch.guest_xstate_image) { + vcpu->arch.guest_xstate_image = + kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL); + if (!vcpu->arch.guest_xstate_image) + goto err; + } + if (!vcpu->arch.host_xstate_image) { + vcpu->arch.host_xstate_image = + kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL); + if (!vcpu->arch.host_xstate_image) + goto err; + } + + return 0; + +err: + kvm_arch_vcpu_destroy_xstate_image(vcpu); + return -ENOMEM; +} + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { if (vcpu->arch.time_page) { @@ -5152,6 +5348,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) } kvm_x86_ops->vcpu_free(vcpu); + kvm_arch_vcpu_destroy_xstate_image(vcpu); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, @@ -5189,6 +5386,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) vcpu_put(vcpu); kvm_x86_ops->vcpu_free(vcpu); + kvm_arch_vcpu_destroy_xstate_image(vcpu); } int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) @@ -5201,6 +5399,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.dr6 = DR6_FIXED_1; vcpu->arch.dr7 = DR7_FIXED_1; + if (kvm_arch_vcpu_create_xstate_image(vcpu) < 0) + return -ENOMEM; + return kvm_x86_ops->vcpu_reset(vcpu); } -- 1.6.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html