On Wed, Feb 28, 2024, Vitaly Kuznetsov wrote: > @@ -273,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e > int nent) > { > struct kvm_cpuid_entry2 *best; > + struct kvm_hypervisor_cpuid kvm_cpuid; > > best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); > if (best) { > @@ -299,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e > cpuid_entry_has(best, X86_FEATURE_XSAVEC))) > best->ebx = xstate_required_size(vcpu->arch.xcr0, true); > > - best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent); > - if (kvm_hlt_in_guest(vcpu->kvm) && best && > - (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) > - best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); > + kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE); > + if (kvm_cpuid.base) { > + best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base); > + if (kvm_hlt_in_guest(vcpu->kvm) && best) > + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); > + } > > if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { > best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); Not now, as we need a minimal fix, but we need to fix the root problem, this is way to brittle. Multiple helpers take @vcpu, including __kvm_update_cpuid_runtime(), before the incoming CPUID is set. That's just asking for new bugs to crop up. Am I missing something, or can we just swap() the new and old, update the new in the context of the vCPU, and then undo the swap() if there's an issue? vcpu->mutex is held, and accessing this state from a different task is wildly unsafe, so I don't see any problem with temporarily having an in-flux state. If we want to be paranoid, we can probably get away with killing the VM if the vCPU has run and the incoming CPUID is "bad", e.g. to guard against something in kvm_set_cpuid() consuming soon-to-be-stale state. And that's actually a feature of sorts, because _if_ something in kvm_set_cpuid() consumes the vCPU's CPUID, then we have a bug _now_ that affects the happy path. Completely untested (I haven't updated the myriad helpers), but this would allow us to revert/remove all of the changes that allow peeking at a CPUID array that lives outside of the vCPU. static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int nent) { int r, i; swap(vcpu->arch.cpuid_entries, e2); swap(vcpu->arch.cpuid_nent, nent); #ifdef CONFIG_KVM_HYPERV if (kvm_cpuid_has_hyperv(vcpu)) { r = kvm_hv_vcpu_init(vcpu); if (r) goto err; } #endif r = kvm_check_cpuid(vcpu); if (r) goto err; kvm_update_cpuid_runtime(vcpu); /* * KVM does not correctly handle changing guest CPUID after KVM_RUN, as * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with * the core vCPU model on the fly. It would've been better to forbid any * KVM_SET_CPUID{,2} calls after KVM_RUN altogether but unfortunately * some VMMs (e.g. QEMU) reuse vCPU fds for CPU hotplug/unplug and do * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ if (kvm_vcpu_has_run(vcpu)) { r = kvm_cpuid_check_equal(vcpu, e2, nent); if (r) goto err; } vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE); #ifdef CONFIG_KVM_XEN vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE); #endif kvm_vcpu_after_set_cpuid(vcpu); kvfree(e2); return 0; err: swap(vcpu->arch.cpuid_entries, e2); swap(vcpu->arch.cpuid_nent, nent); return r; }