Sean,
This patch inhibits VM running in x2APIC mode on system w/ x2AVIC support.
On 9/2/2022 7:22 PM, Sean Christopherson wrote:
Reintroduce APICV_INHIBIT_REASON_X2APIC as a "partial" inhibit for AMD
to fix a bug where the APIC access page is visible to vCPUs that have
x2APIC enabled, i.e. shouldn't be able to "see" the xAPIC MMIO region.
On AMD, due to its "hybrid" mode where AVIC is enabled when x2APIC is
enabled even without x2AVIC support, the bug occurs any time AVIC is
enabled as x2APIC is fully emulated by KVM. I.e. hardware isn't aware
that the guest is operating in x2APIC mode.
Opportunistically drop the "can" while updating avic_activate_vmcb()'s
comment, i.e. to state that KVM _does_ support the hybrid mode. Move
the "Note:" down a line to conform to preferred kernel/KVM multi-line
comment style.
Leave Intel as-is for now to avoid a subtle performance regression, even
though Intel likely suffers from the same bug. On Intel, in theory the
bug rears its head only when vCPUs share host page tables (extremely
likely) and x2APIC enabling is not consistent within the guest, i.e. if
some vCPUs have x2APIC enabled and other does do not (unlikely to occur
except in certain situations, e.g. bringing up APs).
Fixes: 0e311d33bfbe ("KVM: SVM: Introduce hybrid-AVIC mode")
Cc: stable@xxxxxxxxxxxxxxx
Suggested-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 10 ++++++++++
arch/x86/kvm/lapic.c | 4 +++-
arch/x86/kvm/mmu/mmu.c | 2 +-
arch/x86/kvm/svm/avic.c | 15 +++++++-------
arch/x86/kvm/x86.c | 35 +++++++++++++++++++++++++++++----
5 files changed, 53 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2c96c43c313a..1fd1b66ceeb6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1132,6 +1132,15 @@ enum kvm_apicv_inhibit {
* AVIC is disabled because SEV doesn't support it.
*/
APICV_INHIBIT_REASON_SEV,
+
+ /*
+ * Due to sharing page tables across vCPUs, the xAPIC memslot must be
+ * inhibited if any vCPU has x2APIC enabled. Note, this is a "partial"
+ * inhibit; APICv can still be activated, but KVM mustn't retain/create
+ * SPTEs for the APIC access page. Like the APIC ID and APIC base
+ * inhibits, this is sticky for simplicity.
+ */
+ APICV_INHIBIT_REASON_X2APIC,
Actually, shouldn't the APICV_INHIBIT_REASON_X2APIC is set only when
vCPU has x2APIC enabled on the system with _NO x2AVIC support_ ? For
example, .....
};
struct kvm_arch {
@@ -1903,6 +1912,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
+bool kvm_apicv_memslot_activated(struct kvm *kvm);
bool kvm_apicv_activated(struct kvm *kvm);
bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 38e9b8e5278c..d956cd37908e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2394,8 +2394,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
}
}
- if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
+ if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) {
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
+ kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_X2APIC);
+ }
.... Here, since we do not want to inhibit APICV/AVIC on system that can
support x2AVIC, this should be set in the vendor-specific call-back
function, where appropriate checks can be made.
if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
kvm_vcpu_update_apicv(vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e418ef3ecfcb..cea25552869f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4150,7 +4150,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
* when the AVIC is re-enabled.
*/
if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
- !kvm_apicv_activated(vcpu->kvm))
+ !kvm_apicv_memslot_activated(vcpu->kvm))
return RET_PF_EMULATE;
}
....
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d7374d768296..6ab9088c2531 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9379,15 +9379,29 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
-bool kvm_apicv_activated(struct kvm *kvm)
+bool kvm_apicv_memslot_activated(struct kvm *kvm)
{
return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
}
+
+static unsigned long kvm_apicv_get_inhibit_reasons(struct kvm *kvm)
+{
+ /*
+ * x2APIC only needs to "inhibit" the MMIO region, all other aspects of
+ * APICv can continue to be utilized.
+ */
+ return READ_ONCE(kvm->arch.apicv_inhibit_reasons) & ~APICV_INHIBIT_REASON_X2APIC;
+}
+
+bool kvm_apicv_activated(struct kvm *kvm)
+{
+ return !kvm_apicv_get_inhibit_reasons(kvm);
+}
EXPORT_SYMBOL_GPL(kvm_apicv_activated);
bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
{
- ulong vm_reasons = READ_ONCE(vcpu->kvm->arch.apicv_inhibit_reasons);
+ ulong vm_reasons = kvm_apicv_get_inhibit_reasons(vcpu->kvm);
ulong vcpu_reasons = static_call(kvm_x86_vcpu_get_apicv_inhibit_reasons)(vcpu);
return (vm_reasons | vcpu_reasons) == 0;
@@ -10122,7 +10136,15 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
set_or_clear_apicv_inhibit(&new, reason, set);
- if (!!old != !!new) {
+ /*
+ * If the overall "is APICv activated" status is unchanged, simply add
+ * or remove the inihbit from the pile. x2APIC is an exception, as it
+ * is a partial inhibit (only blocks SPTEs for the APIC access page).
+ * If x2APIC is the only inhibit in either the old or the new set, then
+ * vCPUs need to be kicked to transition between partially-inhibited
+ * and fully-inhibited.
+ */
+ if ((!!old != !!new) || old == X2APIC_ENABLE || new == X2APIC_ENABLE) {
Why are we comparing APICV inhibit reasons (old, new) with X2APIC_ENABLE
here? Do you mean to compare with APICV_INHIBIT_REASON_X2APIC?
Thanks,
Suravee
/*
* Kick all vCPUs before setting apicv_inhibit_reasons to avoid
* false positives in the sanity check WARN in svm_vcpu_run().
@@ -10137,7 +10159,12 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
*/
kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
kvm->arch.apicv_inhibit_reasons = new;
- if (new) {
+
+ /*
+ * Zap SPTEs for the APIC access page if APICv is newly
+ * inhibited (partially or fully).
+ */
+ if (new && !old) {
unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
kvm_zap_gfn_range(kvm, gfn, gfn+1);
}