Whenever KVM knows the page role flags have changed, it needs to drop the current MMU root and possibly load one from the prev_roots cache. Currently it is papering over some overly simplistic code by just dropping _all_ roots, so that the root will be reloaded by kvm_mmu_reload, but this has bad performance for the TDP MMU (which drops the whole of the page tables when freeing a root, without the performance safety net of a hash table). To do this, KVM needs to do a more kvm_mmu_update_root call from kvm_mmu_reset_context. Introduce a new request bit so that the call can be delayed until after a possible KVM_REQ_MMU_RELOAD, which would kill all hopes of finding a cached PGD. Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/x86.c | 13 +++++++++++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 319ac0918aa2..532cda546eb9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -102,6 +102,7 @@ #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_MMU_UPDATE_ROOT KVM_ARCH_REQ(31) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 2386fadae9ed..8e6e62d8df36 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -498,7 +498,7 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, kvm_init_mmu(vcpu); if (!nested_npt) - kvm_mmu_update_root(vcpu); + kvm_make_request(KVM_REQ_MMU_UPDATE_ROOT, vcpu); return 0; } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2dbd7a9ada84..c3595bc0a02d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1133,7 +1133,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, kvm_init_mmu(vcpu); if (!nested_ept) - kvm_mmu_update_root(vcpu); + kvm_make_request(KVM_REQ_MMU_UPDATE_ROOT, vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9800c8883a48..9043548e6baf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1189,7 +1189,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) return 1; if (cr3 != kvm_read_cr3(vcpu)) - kvm_mmu_update_root(vcpu); + kvm_make_request(KVM_REQ_MMU_UPDATE_ROOT, vcpu); vcpu->arch.cr3 = cr3; kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3); @@ -9835,8 +9835,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } } - if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) + if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { kvm_mmu_unload(vcpu); + + /* + * Dropping all roots leaves no hope for loading a cached + * one. Let kvm_mmu_reload build a new one. + */ + kvm_clear_request(KVM_REQ_MMU_UPDATE_ROOT, vcpu); + } if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) __kvm_migrate_timers(vcpu); if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) @@ -9848,6 +9855,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(r)) goto out; } + if (kvm_check_request(KVM_REQ_MMU_UPDATE_ROOT, vcpu)) + kvm_mmu_update_root(vcpu); if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu)) -- 2.31.1