On Thu, Jan 07, 2021, Maxim Levitsky wrote: > It is possible to exit the nested guest mode, entered by > svm_set_nested_state prior to first vm entry to it (e.g due to pending event) > if the nested run was not pending during the migration. Ugh, I assume this is due to one of the "premature" nested_ops->check_events() calls that are necessitated by the event mess? I'm guessing kvm_vcpu_running() is the culprit? If my assumption is correct, this bug affects nVMX as well. Rather than clear the request blindly on any nested VM-Exit, what about something like the following? IMO, KVM really shouldn't be synthesizing a nested VM-Exit before it processes pending requests, but unfortunately the nested_ops->check_events() mess isn't easily fixed. This will at least limit the mess, e.g. with this we'd get a WARN if KVM_REQ_GET_NESTED_STATE_PAGES is set after some other VM-Exit. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3136e05831cf..f44e6f7a0c9b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2857,17 +2857,15 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) if (!pe) return; - if (is_guest_mode(vcpu)) { - r = kvm_x86_ops.nested_ops->check_events(vcpu); - if (r < 0) - return; - /* - * If an event has happened and caused a vmexit, - * we know INITs are latched and therefore - * we will not incorrectly deliver an APIC - * event instead of a vmexit. - */ - } + r = kvm_nested_check_events(vcpu); + if (r < 0) + return; + + /* + * If an event has happened and caused a vmexit, we know INITs are + * latched and therefore we will not incorrectly deliver an APIC + * event instead of a vmexit. + */ /* * INITs are latched while CPU is in specific states diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3f7c1fc7a3ce..b0f172d13cab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8223,6 +8223,25 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr); } +int kvm_nested_check_events(struct kvm_vcpu *vcpu) +{ + int r; + + if (!is_guest_mode(vcpu)) + return 0; + + r = kvm_x86_ops.nested_ops->check_events(vcpu); + + /* + * Clear nested-specific requests if checking nested events triggered a + * VM-Exit, they'll be re-requested on nested VM-Enter (if necessary). + */ + if (!is_guest_mode(vcpu)) + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + + return r; +} + static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) { int r; @@ -8267,11 +8286,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit * from L2 to L1 due to pending L1 events which require exit * from L2 to L1. */ - if (is_guest_mode(vcpu)) { - r = kvm_x86_ops.nested_ops->check_events(vcpu); - if (r < 0) - goto busy; - } + r = kvm_nested_check_events(vcpu); + if (r < 0) + goto busy; /* try to inject new event if pending */ if (vcpu->arch.exception.pending) { @@ -8789,7 +8806,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { - if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { + if (!WARN_ON(!is_guest_mode(vcpu) && + unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu)))) { r = 0; goto out; } @@ -9111,8 +9129,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) - kvm_x86_ops.nested_ops->check_events(vcpu); + (void)kvm_nested_check_events(vcpu); return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c5ee0f5ce0f1..dce61fda9c5e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -247,6 +247,8 @@ static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu) return is_smm(vcpu) || kvm_x86_ops.apic_init_signal_blocked(vcpu); } +int kvm_nested_check_events(struct kvm_vcpu *vcpu); + void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); > In this case we must not switch to the nested msr permission bitmap. > Also add a warning to catch similar cases in the future. > > Fixes: a7d5c7ce41ac1 ("KVM: nSVM: delay MSR permission processing to first nested VM run") > > Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> > --- > arch/x86/kvm/svm/nested.c | 6 ++++++ > 1 file changed, 6 insertions(+) > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > index b0b667456b2e7..ee4f2082ad1bd 100644 > --- a/arch/x86/kvm/svm/nested.c > +++ b/arch/x86/kvm/svm/nested.c > @@ -199,6 +199,10 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) > static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > + > + if (WARN_ON_ONCE(!is_guest_mode(&svm->vcpu))) > + return false; > + > if (!nested_svm_vmrun_msrpm(svm)) { > vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > vcpu->run->internal.suberror = > @@ -595,6 +599,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) > svm->nested.vmcb12_gpa = 0; > WARN_ON_ONCE(svm->nested.nested_run_pending); > > + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); > + > /* in case we halted in L2 */ > svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; > > -- > 2.26.2 >