On Thu, Sep 28, 2023, Maxim Levitsky wrote: > diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c > index 4b74ea91f4e6bb6..28bb0e6b321660d 100644 > --- a/arch/x86/kvm/svm/avic.c > +++ b/arch/x86/kvm/svm/avic.c > @@ -62,6 +62,9 @@ static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u); > static bool force_avic; > module_param_unsafe(force_avic, bool, 0444); > > +static int avic_zen2_errata_workaround = -1; > +module_param(avic_zen2_errata_workaround, int, 0444); > + > /* Note: > * This hash table is used to map VM_ID to a struct kvm_svm, > * when handling AMD IOMMU GALOG notification to schedule in > @@ -276,7 +279,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, > > static int avic_init_backing_page(struct kvm_vcpu *vcpu) > { > - u64 *entry, new_entry; > + u64 *entry; > int id = vcpu->vcpu_id; > struct vcpu_svm *svm = to_svm(vcpu); > > @@ -308,10 +311,10 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) > if (!entry) > return -EINVAL; > > - new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & > - AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | > - AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); > - WRITE_ONCE(*entry, new_entry); > + svm->avic_physical_id_entry = __sme_set((page_to_phys(svm->avic_backing_page) & > + AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | > + AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); > + WRITE_ONCE(*entry, svm->avic_physical_id_entry); Aha! Rather than deal with the dummy entry at runtime, simply point the pointer at the dummy entry during setup. And instead of adding a dedicated erratum param, let's piggyback VMX's enable_ipiv. It's not a true disable, but IMO it's close enough. That will make the param much more self-documenting, and won't feel so awkward if someone wants to disable IPI virtualization for other reasons. Then we can do this in three steps: 1. Move enable_ipiv to common code 2. Let userspace disable enable_ipiv for SVM+AVIC 3. Disable enable_ipiv for affected CPUs The biggest downside to using enable_ipiv is that a the "auto" behavior for the erratum will be a bit ugly, but that's a solvable problem. If you've no objection to the above approach, I'll post the attached patches along with a massaged version of this patch. The attached patches apply on top of an AVIC clean[*], which (shameless plug) could use a review ;-) [*] https://lore.kernel.org/all/20230815213533.548732-1-seanjc@xxxxxxxxxx
>From 4990d0e56b1e9bb8bf97502d525779b2a43d26d4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@xxxxxxxxxx> Date: Thu, 28 Sep 2023 17:22:52 -0700 Subject: [PATCH 1/2] KVM: VMX: Move enable_ipiv knob to common x86 Move enable_ipiv to common x86 so that it can be reused by SVM to control IPI virtualization when AVIC is enabled. SVM doesn't actually provide a way to truly disable IPI virtualization, but KVM can get close enough by skipping the necessary table programming. Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/capabilities.h | 1 - arch/x86/kvm/vmx/vmx.c | 2 -- arch/x86/kvm/x86.c | 3 +++ 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e9e69009789e..7239155213c7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1806,6 +1806,7 @@ extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; extern struct kvm_x86_ops kvm_x86_ops; #define KVM_X86_OP(func) \ diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 41a4533f9989..8cbfef64ea75 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -15,7 +15,6 @@ extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; -extern bool __read_mostly enable_ipiv; extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 72e3943f3693..f51dac6b21ae 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -104,8 +104,6 @@ static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); module_param(enable_apicv, bool, S_IRUGO); - -bool __read_mostly enable_ipiv = true; module_param(enable_ipiv, bool, 0444); /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6573c89c35a9..ccf5aa4fbe73 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -235,6 +235,9 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); bool __read_mostly enable_apicv = true; EXPORT_SYMBOL_GPL(enable_apicv); +bool __read_mostly enable_ipiv = true; +EXPORT_SYMBOL_GPL(enable_ipiv); + u64 __read_mostly host_xss; EXPORT_SYMBOL_GPL(host_xss); base-commit: ca3beed3b49348748201a2a35888b49858ce5d73 -- 2.42.0.582.g8ccd20d70d-goog
>From fb86a56d11eac07626ffd9defeff39b88dbf6406 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@xxxxxxxxxx> Date: Thu, 28 Sep 2023 17:25:48 -0700 Subject: [PATCH 2/2] KVM: SVM: Add enable_ipiv param, skip physical ID programming if disabled Let userspace "disable" IPI virtualization via an enable_ipiv module param by programming a dummy entry instead of the vCPU's actual backing entry in the physical ID table. SVM doesn't provide a way to actually disable IPI virtualization in hardware, but by leaving all entries blank, every IPI in the guest (except for self-IPIs) will generate a VM-Exit. Providing a way to effectively disable IPI virtualization will allow KVM to safely enable AVIC on hardware that is suseptible to erratum #1235, which causes hardware to sometimes fail to detect that the IsRunning bit has been cleared by software. All credit goes to Maxim for the idea! Suggested-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/kvm/svm/avic.c | 15 ++++++++++++++- arch/x86/kvm/svm/svm.c | 3 +++ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index fa87b6853f1d..fc804bb84394 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -310,7 +310,20 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) AVIC_PHYSICAL_ID_ENTRY_VALID_MASK; WRITE_ONCE(table[id], new_entry); - svm->avic_physical_id_entry = &table[id]; + /* + * IPI virtualization is bundled with AVIC, but effectively can be + * disabled simply by never marking vCPUs as running in the physical ID + * table. Use a dummy entry to avoid conditionals in the runtime code, + * and to keep the IOMMU coordination logic as simple as possible. The + * entry in the table also needs to be valid (see above), otherwise KVM + * will ignore IPIs due to thinking the target doesn't exist. + */ + if (enable_ipiv) { + svm->avic_physical_id_entry = &table[id]; + } else { + svm->ipiv_disabled_backing_entry = table[id]; + svm->avic_physical_id_entry = &svm->ipiv_disabled_backing_entry; + } return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index acdd0b89e471..bc40ffb5c47c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -227,6 +227,8 @@ module_param(tsc_scaling, int, 0444); static bool avic; module_param(avic, bool, 0444); +module_param(enable_ipiv, bool, 0444); + bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); @@ -5252,6 +5254,7 @@ static __init int svm_hardware_setup(void) enable_apicv = avic = avic && avic_hardware_setup(); if (!enable_apicv) { + enable_ipiv = false; svm_x86_ops.vcpu_blocking = NULL; svm_x86_ops.vcpu_unblocking = NULL; svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 147516617f88..7a1fc9325d74 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -264,6 +264,7 @@ struct vcpu_svm { u32 ldr_reg; u32 dfr_reg; + u64 ipiv_disabled_backing_entry; u64 *avic_physical_id_entry; /* -- 2.42.0.582.g8ccd20d70d-goog