From: Boris Ostrovsky <boris.ostrovsky@xxxxxxx> In some cases guests should not provide workarounds for errata even when the physical processor is affected. For example, because of erratum 400 on family 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before going to idle in order to avoid getting stuck in a non-C0 state. This is not necessary: HLT and IO instructions are intercepted and therefore there is no reason for erratum 400 workaround in the guest. This patch allows us to present a guest with certain errata as fixed, regardless of the state of actual hardware. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx> --- arch/x86/include/asm/kvm_host.h | 6 ++++ arch/x86/kvm/svm.c | 55 +++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 30 ++++++++++++++++++++- 3 files changed, 90 insertions(+), 1 deletions(-) Second version of OSVW patch. svm_init_osvw() is still in svm.c but registers' values are kept in virtual MSRs and should be available on reboot after cross-vendor migration. The registers can also be set from userland so that they are consistent across a cluster. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4973f4..9ef9215 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -452,6 +452,12 @@ struct kvm_vcpu_arch { u32 id; bool send_user_only; } apf; + + /* OSVW MSRs (AMD only) */ + struct { + u64 length; + u64 status; + } osvw; }; struct kvm_arch { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e32243e..b19769d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -110,6 +110,13 @@ struct nested_state { #define MSRPM_OFFSETS 16 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; +/* + * Set osvw_len to higher value when updated Revision Guides + * are published and we know what the new status bits are + */ +static uint64_t osvw_len = 4, osvw_status; +static DEFINE_SPINLOCK(svm_lock); + struct vcpu_svm { struct kvm_vcpu vcpu; struct vmcb *vmcb; @@ -556,6 +563,20 @@ static void svm_init_erratum_383(void) erratum_383_found = true; } +static void svm_init_osvw(struct kvm_vcpu *vcpu) +{ + /* + * Guests should see errata 400 and 415 as fixed (assuming that + * HLT and IO instructions are intercepted). + */ + vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; + vcpu->arch.osvw.status = osvw_status & ~(6ULL); + + /* Mark erratum 298 as present */ + if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) + vcpu->arch.osvw.status |= 1; +} + static int has_svm(void) { const char *msg; @@ -620,6 +641,38 @@ static int svm_hardware_enable(void *garbage) __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; } + + /* + * Get OSVW bits. + * + * Note that it is possible to have a system with mixed processor + * revisions and therefore different OSVW bits. If bits are not the same + * on different processors then choose the worst case (i.e. if erratum + * is present on one processor and not on another then assume that the + * erratum is present everywhere). + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { + uint64_t len, status; + int err; + + len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); + if (!err) + status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, + &err); + + spin_lock(&svm_lock); + if (err) + osvw_status = osvw_len = 0; + else { + if (len < osvw_len) + osvw_len = len; + osvw_status |= status; + osvw_status &= (1ULL << osvw_len) - 1; + } + spin_unlock(&svm_lock); + } else + osvw_status = osvw_len = 0; + svm_init_erratum_383(); return 0; @@ -1185,6 +1238,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) if (kvm_vcpu_is_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; + svm_init_osvw(&svm->vcpu); + return &svm->vcpu; free_page4: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4c938da..49a7b3c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -598,6 +598,14 @@ static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); } +static bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + return best && (best->ecx & bit(X86_FEATURE_OSVW)); +} + static void update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -1700,6 +1708,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) */ pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); break; + case MSR_AMD64_OSVW_ID_LENGTH: + if (!guest_cpuid_has_osvw(vcpu)) + return 1; + vcpu->arch.osvw.length = data; + break; + case MSR_AMD64_OSVW_STATUS: + if (!guest_cpuid_has_osvw(vcpu)) + return 1; + vcpu->arch.osvw.status = data; + break; default: if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); @@ -1978,6 +1996,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) */ data = 0xbe702111; break; + case MSR_AMD64_OSVW_ID_LENGTH: + if (!guest_cpuid_has_osvw(vcpu)) + return 1; + data = vcpu->arch.osvw.length; + break; + case MSR_AMD64_OSVW_STATUS: + if (!guest_cpuid_has_osvw(vcpu)) + return 1; + data = vcpu->arch.osvw.status; + break; default: if (!ignore_msrs) { pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); @@ -2451,7 +2479,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word6_x86_features = F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | - F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | + F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); /* cpuid 0xC0000001.edx */ -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html