On Thu, Feb 08, 2024, Paolo Bonzini wrote: > On Thu, Feb 8, 2024 at 2:18 PM Wilczynski, Michal > <michal.wilczynski@xxxxxxxxx> wrote: > > Hi, I've tested the patch and it seems to work, both on Intel and AMD. > > There was a problem with applying this chunk though: > > > > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h > > index ac8b7614e79d..3d18fa7db353 100644 > > --- a/arch/x86/include/asm/kvm-x86-ops.h > > +++ b/arch/x86/include/asm/kvm-x86-ops.h > > @@ -119,7 +119,8 @@ KVM_X86_OP(setup_mce) > > #ifdef CONFIG_KVM_SMM > > KVM_X86_OP(smi_allowed) > > KVM_X86_OP() // <- This shouldn't be there I guess ? > > -KVM_X86_OP(leave_smm) > > +KVM_X86_OP(leave_smm_prepare) > > +KVM_X86_OP(leave_smm_commit) > > KVM_X86_OP(enable_smi_window) > > #endif > > KVM_X86_OP_OPTIONAL(dev_get_attr) > > > > Anyway I was a bit averse to this approach as I noticed in the git log > > that callbacks like e.g post_leave_smm() used to exist, but they were later > > removed, so I though the maintainers don't like introducing extra > > callbacks. > > If they are needed, it's fine. In my opinion a new callback is easier > to handle and understand than new state. Yeah, we ripped out post_leave_smm() because its sole usage at the time was buggy, and having a callback without a purpose would just be dead code. > > > 2) otherwise, if the problem is that we have not gone through the > > > vmenter yet, then KVM needs to do that and _then_ inject the triple > > > fault. The fix is to merge the .triple_fault and .check_nested_events > > > callbacks, with something like the second attached patch - which > > > probably has so many problems that I haven't even tried to compile it. > > > > Well, in this case if we know that RSM will fail it doesn't seem to me > > like it make sense to run vmenter just do kill the VM anyway, this would > > be more confusing. > > Note that the triple fault must not kill the VM, it's just causing a > nested vmexit from L2 to L1. KVM's algorithm to inject a > vmexit-causing event is always to first ensure that the VMCS02 (VMCB02 > for AMD) is consistent, and only then trigger the vmexit. So if patch > 2 or something like it works, that would be even better. > > > I've made the fix this way based on our discussion with Sean in v1, and > > tried to mark the RSM instruction with a flag, as a one that needs > > actual HW VMenter to complete succesfully, and based on that information > > manipulate nested_run_pending. Heh, you misunderstood my suggestion. : But due to nested_run_pending being (unnecessarily) buried in vendor structs, it : might actually be easier to do a cleaner fix. E.g. add yet another flag to track : that a hardware VM-Enter needs to be completed in order to complete instruction : emulation. I didn't mean add a flag to the emulator to muck with nested_run_pending, I meant add a flag to kvm_vcpu_arch to be a superset of nested_run_pending. E.g. as a first step, something like the below. And then as follow up, see if it's doable to propagate nested_run_pending => insn_emulation_needs_vmenter so that the nested_run_pending checks in {svm,vmx}_{interrupt,nmi,smi}_allowed() can be dropped. --- arch/x86/include/asm/kvm_host.h | 8 ++++++ arch/x86/kvm/smm.c | 10 ++++++-- arch/x86/kvm/x86.c | 44 +++++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d271ba20a0b2..bb4250551619 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -769,6 +769,14 @@ struct kvm_vcpu_arch { u64 ia32_misc_enable_msr; u64 smbase; u64 smi_count; + + /* + * Tracks if a successful VM-Enter is needed to complete emulation of + * an instruction, e.g. to ensure emulation of RSM or nested VM-Enter, + * which can directly inject events, completes before KVM attempts to + * inject new events. + */ + bool insn_emulation_needs_vmenter; bool at_instruction_boundary; bool tpr_access_reporting; bool xfd_no_write_intercept; diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index dc3d95fdca7d..c6e597b8c794 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -640,8 +640,14 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) #ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - return rsm_load_state_64(ctxt, &smram.smram64); + ret = rsm_load_state_64(ctxt, &smram.smram64); else #endif - return rsm_load_state_32(ctxt, &smram.smram32); + ret = rsm_load_state_32(ctxt, &smram.smram32); + + if (ret != X86EMUL_CONTINUE) + return ret; + + vcpu->arch.insn_emulation_needs_vmenter = true; + return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bf10a9073a09..21a7183bbf69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10195,6 +10195,30 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) return kvm_x86_ops.nested_ops->check_events(vcpu); } +static int kvm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + if (vcpu->arch.insn_emulation_needs_vmenter) + return -EBUSY; + + return static_call(kvm_x86_interrupt_allowed)(vcpu, for_injection); +} + +static int kvm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + if (vcpu->arch.insn_emulation_needs_vmenter) + return -EBUSY; + + return static_call(kvm_x86_smi_allowed)(vcpu, for_injection) +} + +static int kvm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + if (vcpu->arch.insn_emulation_needs_vmenter) + return -EBUSY; + + return x86_nmi_static_call(kvm_x86_nmi_allowed)(vcpu, for_injection); +} + static void kvm_inject_exception(struct kvm_vcpu *vcpu) { /* @@ -10384,7 +10408,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, */ #ifdef CONFIG_KVM_SMM if (vcpu->arch.smi_pending) { - r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY; + r = can_inject ? kvm_smi_allowed(vcpu, true) : -EBUSY; if (r < 0) goto out; if (r) { @@ -10398,7 +10422,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, #endif if (vcpu->arch.nmi_pending) { - r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY; + r = can_inject ? kvm_nmi_allowed(vcpu, true) : -EBUSY; if (r < 0) goto out; if (r) { @@ -10406,14 +10430,14 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, vcpu->arch.nmi_injected = true; static_call(kvm_x86_inject_nmi)(vcpu); can_inject = false; - WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0); + WARN_ON_ONCE(kvm_nmi_allowed() < 0); } if (vcpu->arch.nmi_pending) static_call(kvm_x86_enable_nmi_window)(vcpu); } if (kvm_cpu_has_injectable_intr(vcpu)) { - r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY; + r = can_inject ? kvm_interrupt_allowed(vcpu, true) : -EBUSY; if (r < 0) goto out; if (r) { @@ -10422,7 +10446,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, if (!WARN_ON_ONCE(irq == -1)) { kvm_queue_interrupt(vcpu, irq, false); static_call(kvm_x86_inject_irq)(vcpu, false); - WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0); + WARN_ON(kvm_interrupt_allowed(vcpu, true) < 0); } } if (kvm_cpu_has_injectable_intr(vcpu)) @@ -10969,6 +10993,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) && (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED)); + vcpu->arch.insn_emulation_needs_vmenter = false; + exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu); if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; @@ -13051,14 +13077,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; if (kvm_test_request(KVM_REQ_NMI, vcpu) || - (vcpu->arch.nmi_pending && - static_call(kvm_x86_nmi_allowed)(vcpu, false))) + (vcpu->arch.nmi_pending && kvm_nmi_allowed(vcpu, false))) return true; #ifdef CONFIG_KVM_SMM if (kvm_test_request(KVM_REQ_SMI, vcpu) || - (vcpu->arch.smi_pending && - static_call(kvm_x86_smi_allowed)(vcpu, false))) + (vcpu->arch.smi_pending && kvm_smi_allowed(vcpu, false))) return true; #endif @@ -13136,7 +13160,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) { - return static_call(kvm_x86_interrupt_allowed)(vcpu, false); + return kvm_interrupt_allowed(vcpu, false); } unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) base-commit: f8fe663bc413d2a14ab9a452638a99b975011a9d --