[Bug 218267] [Sapphire Rapids][Upstream]Boot up multiple Windows VMs hang

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



https://bugzilla.kernel.org/show_bug.cgi?id=218267

--- Comment #8 from Sean Christopherson (seanjc@xxxxxxxxxx) ---
Thanks Chao!

Until the ucode update is available, I think we can workaround the issue in KVM
by clearing VECTORING_INFO_VALID_MASK _immediately_ after exit, i.e. before
queueing the event for re-injection, if it should be impossible for the exit to
have occurred while vectoring.  I'm not sure I want to carry something like
this long-term since a ucode fix is imminent, but at the least it can hopefully
unblock end users.

The below uses a fairly conservative list of exits (a false positive could be
quite painful).  A slightly less conservative approach would be to also
include:

case EXIT_REASON_EXTERNAL_INTERRUPT:
case EXIT_REASON_TRIPLE_FAULT:
case EXIT_REASON_INIT_SIGNAL:
case EXIT_REASON_SIPI_SIGNAL:
case EXIT_REASON_INTERRUPT_WINDOW:
case EXIT_REASON_NMI_WINDOW:

as those exits should all be recognized only at instruction boundaries.

Compile tested only...

---
 arch/x86/kvm/vmx/vmx.c | 66 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 893366e53732..7240bd72b5f2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -147,6 +147,9 @@ module_param_named(preemption_timer,
enable_preemption_timer, bool, S_IRUGO);
 extern bool __read_mostly allow_smaller_maxphyaddr;
 module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);

+static bool __ro_after_init enable_spr141_erratum_workaround = true;
+module_param(enable_spr141_erratum_workaround, bool, S_IRUGO);
+
 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
 #define KVM_VM_CR0_ALWAYS_ON                           \
@@ -7163,8 +7166,67 @@ static void __vmx_complete_interrupts(struct kvm_vcpu
*vcpu,
        }
 }

+static bool is_vectoring_on_exit_impossible(struct vcpu_vmx *vmx)
+{
+       switch (vmx->exit_reason.basic) {
+       case EXIT_REASON_CPUID:
+       case EXIT_REASON_HLT:
+       case EXIT_REASON_INVD:
+       case EXIT_REASON_INVLPG:
+       case EXIT_REASON_RDPMC:
+       case EXIT_REASON_RDTSC:
+       case EXIT_REASON_VMCALL:
+       case EXIT_REASON_VMCLEAR:
+       case EXIT_REASON_VMLAUNCH:
+       case EXIT_REASON_VMPTRLD:
+       case EXIT_REASON_VMPTRST:
+       case EXIT_REASON_VMREAD:
+       case EXIT_REASON_VMRESUME:
+       case EXIT_REASON_VMWRITE:
+       case EXIT_REASON_VMOFF:
+       case EXIT_REASON_VMON:
+       case EXIT_REASON_CR_ACCESS:
+       case EXIT_REASON_DR_ACCESS:
+       case EXIT_REASON_IO_INSTRUCTION:
+       case EXIT_REASON_MSR_READ:
+       case EXIT_REASON_MSR_WRITE:
+       case EXIT_REASON_MSR_LOAD_FAIL:
+       case EXIT_REASON_MWAIT_INSTRUCTION:
+       case EXIT_REASON_MONITOR_TRAP_FLAG:
+       case EXIT_REASON_MONITOR_INSTRUCTION:
+       case EXIT_REASON_PAUSE_INSTRUCTION:
+       case EXIT_REASON_TPR_BELOW_THRESHOLD:
+       case EXIT_REASON_GDTR_IDTR:
+       case EXIT_REASON_LDTR_TR:
+       case EXIT_REASON_INVEPT:
+       case EXIT_REASON_RDTSCP:
+       case EXIT_REASON_PREEMPTION_TIMER:
+       case EXIT_REASON_INVVPID:
+       case EXIT_REASON_WBINVD:
+       case EXIT_REASON_XSETBV:
+       case EXIT_REASON_APIC_WRITE:
+       case EXIT_REASON_RDRAND:
+       case EXIT_REASON_INVPCID:
+       case EXIT_REASON_VMFUNC:
+       case EXIT_REASON_ENCLS:
+       case EXIT_REASON_RDSEED:
+       case EXIT_REASON_XSAVES:
+       case EXIT_REASON_XRSTORS:
+       case EXIT_REASON_UMWAIT:
+       case EXIT_REASON_TPAUSE:
+               return true;
+       }
+
+       return false;
+}
+
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
+       if ((vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+           enable_spr141_erratum_workaround &&
+           is_vectoring_on_exit_impossible(vmx))
+               vmx->idt_vectoring_info &= ~VECTORING_INFO_VALID_MASK;
+
        __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
                                  VM_EXIT_INSTRUCTION_LEN,
                                  IDT_VECTORING_ERROR_CODE);
@@ -8487,6 +8549,10 @@ __init int vmx_hardware_setup(void)
        if (!enable_apicv || !cpu_has_vmx_ipiv())
                enable_ipiv = false;

+       if (boot_cpu_data.x86_vfm != INTEL_SAPPHIRERAPIDS_X &&
+           boot_cpu_data.x86_vfm != INTEL_EMERALDRAPIDS_X)
+               enable_spr141_erratum_workaround = false;
+
        if (cpu_has_vmx_tsc_scaling())
                kvm_caps.has_tsc_control = true;


base-commit: 50e5669285fc2586c9f946c1d2601451d77cb49e
--

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux