On Tue, 2022-05-31 at 13:58 -0400, Paolo Bonzini wrote: > Commit 74fd41ed16fd ("KVM: x86: nSVM: support PAUSE filtering when L0 > doesn't intercept PAUSE") introduced passthrough support for nested > pause > filtering, (when the host doesn't intercept PAUSE) (either disabled > with > kvm module param, or disabled with '-overcommit cpu-pm=on') > > Before this commit, L1 KVM didn't intercept PAUSE at all; afterwards, > the feature was exposed as supported by KVM cpuid unconditionally, > thus > if L1 could try to use it even when the L0 KVM can't really support > it. > > In this case the fallback caused KVM to intercept each PAUSE > instruction; > in some cases, such intercept can slow down the nested guest so much > that it can fail to boot. Instead, before the problematic commit KVM > was already setting both thresholds to 0 in vmcb02, but after the > first > userspace VM exit shrink_ple_window was called and would reset the > pause_filter_count to the default value. > > To fix this, change the fallback strategy - ignore the guest > threshold > values, but use/update the host threshold values unless the guest > specifically requests disabling PAUSE filtering (either simple or > advanced). > > Also fix a minor bug: on nested VM exit, when PAUSE filter counter > were copied back to vmcb01, a dirty bit was not set. > > Thanks a lot to Suravee Suthikulpanit for debugging this! > > Fixes: 74fd41ed16fd ("KVM: x86: nSVM: support PAUSE filtering when L0 > doesn't intercept PAUSE") > Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> > Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> > Co-developed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> > Message-Id: <20220518072709.730031-1-mlevitsk@xxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kvm/svm/nested.c | 39 +++++++++++++++++++++---------------- > -- > arch/x86/kvm/svm/svm.c | 4 ++-- > 2 files changed, 23 insertions(+), 20 deletions(-) > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > index 6d0233a2469e..88da8edbe1e1 100644 > --- a/arch/x86/kvm/svm/nested.c > +++ b/arch/x86/kvm/svm/nested.c > @@ -642,6 +642,8 @@ static void nested_vmcb02_prepare_control(struct > vcpu_svm *svm, > struct kvm_vcpu *vcpu = &svm->vcpu; > struct vmcb *vmcb01 = svm->vmcb01.ptr; > struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; > + u32 pause_count12; > + u32 pause_thresh12; > > /* > * Filled at exit: exit_code, exit_code_hi, exit_info_1, > exit_info_2, > @@ -721,27 +723,25 @@ static void > nested_vmcb02_prepare_control(struct vcpu_svm *svm, > if (!nested_vmcb_needs_vls_intercept(svm)) > vmcb02->control.virt_ext |= > VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; > > + pause_count12 = svm->pause_filter_enabled ? svm- > >nested.ctl.pause_filter_count : 0; > + pause_thresh12 = svm->pause_threshold_enabled ? svm- > >nested.ctl.pause_filter_thresh : 0; > if (kvm_pause_in_guest(svm->vcpu.kvm)) { > - /* use guest values since host doesn't use them */ > - vmcb02->control.pause_filter_count = > - svm->pause_filter_enabled ? > - svm->nested.ctl.pause_filter_count : > 0; > + /* use guest values since host doesn't intercept > PAUSE */ > + vmcb02->control.pause_filter_count = pause_count12; > + vmcb02->control.pause_filter_thresh = pause_thresh12; > > - vmcb02->control.pause_filter_thresh = > - svm->pause_threshold_enabled ? > - svm->nested.ctl.pause_filter_thresh : > 0; > - > - } else if (!vmcb12_is_intercept(&svm->nested.ctl, > INTERCEPT_PAUSE)) { > - /* use host values when guest doesn't use them */ > + } else { > + /* start from host values otherwise */ > vmcb02->control.pause_filter_count = vmcb01- > >control.pause_filter_count; > vmcb02->control.pause_filter_thresh = vmcb01- > >control.pause_filter_thresh; > - } else { > - /* > - * Intercept every PAUSE otherwise and > - * ignore both host and guest values > - */ > - vmcb02->control.pause_filter_count = 0; > - vmcb02->control.pause_filter_thresh = 0; > + > + /* ... but ensure filtering is disabled if so > requested. */ > + if (vmcb12_is_intercept(&svm->nested.ctl, > INTERCEPT_PAUSE)) { > + if (!pause_count12) > + vmcb02->control.pause_filter_count = > 0; > + if (!pause_thresh12) > + vmcb02->control.pause_filter_thresh = > 0; > + } > } > > nested_svm_transition_tlb_flush(vcpu); > @@ -1003,8 +1003,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm) > vmcb12->control.event_inj = svm- > >nested.ctl.event_inj; > vmcb12->control.event_inj_err = svm- > >nested.ctl.event_inj_err; > > - if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02- > >control.pause_filter_count) > + if (!kvm_pause_in_guest(vcpu->kvm)) { > vmcb01->control.pause_filter_count = vmcb02- > >control.pause_filter_count; > + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); > + > + } > > nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm- > >vmcb01.ptr); > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 1bd42e7dfa36..4aea82f668fb 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -956,7 +956,7 @@ static void grow_ple_window(struct kvm_vcpu > *vcpu) > struct vmcb_control_area *control = &svm->vmcb->control; > int old = control->pause_filter_count; > > - if (kvm_pause_in_guest(vcpu->kvm) || !old) > + if (kvm_pause_in_guest(vcpu->kvm)) > return; > > control->pause_filter_count = __grow_ple_window(old, > @@ -977,7 +977,7 @@ static void shrink_ple_window(struct kvm_vcpu > *vcpu) > struct vmcb_control_area *control = &svm->vmcb->control; > int old = control->pause_filter_count; > > - if (kvm_pause_in_guest(vcpu->kvm) || !old) > + if (kvm_pause_in_guest(vcpu->kvm)) > return; > > control->pause_filter_count = Thank you Paolo! Best regards, Maxim Levitsky