Thanks a lot for the patch, Sean. I know this has been queued for quite a while. But I just realized I have another question of kvm_mmu_pte_write(): > Remove the update_pte() shadow paging logic, which was obsoleted by > commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never > removed. As pointed out by Yu, KVM never write protects leaf page > tables for the purposes of shadow paging, and instead marks their > associated shadow page as unsync so that the guest can write PTEs at > will. > > The update_pte() path, which predates the unsync logic, optimizes COW > scenarios by refreshing leaf SPTEs when they are written, as opposed to > zapping the SPTE, restarting the guest, and installing the new SPTE on > the subsequent fault. Since KVM no longer write-protects leaf page > tables, update_pte() is unreachable and can be dropped. > > Reported-by: Yu Zhang <yu.c.zhang@xxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 3 -- > arch/x86/kvm/mmu/mmu.c | 49 ++------------------------------- > arch/x86/kvm/x86.c | 1 - > 3 files changed, 2 insertions(+), 51 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3d6616f6f6ef..ed575c5655dd 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -358,8 +358,6 @@ struct kvm_mmu { > int (*sync_page)(struct kvm_vcpu *vcpu, > struct kvm_mmu_page *sp); > void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); > - void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > - u64 *spte, const void *pte); > hpa_t root_hpa; > gpa_t root_pgd; > union kvm_mmu_role mmu_role; > @@ -1031,7 +1029,6 @@ struct kvm_arch { > struct kvm_vm_stat { > ulong mmu_shadow_zapped; > ulong mmu_pte_write; > - ulong mmu_pte_updated; > ulong mmu_pde_zapped; > ulong mmu_flooded; > ulong mmu_recycled; > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 6d16481aa29d..3a2c25852b1f 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -1723,13 +1723,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu, > return 0; > } > > -static void nonpaging_update_pte(struct kvm_vcpu *vcpu, > - struct kvm_mmu_page *sp, u64 *spte, > - const void *pte) > -{ > - WARN_ON(1); > -} > - > #define KVM_PAGE_ARRAY_NR 16 > > struct kvm_mmu_pages { > @@ -3813,7 +3806,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, > context->gva_to_gpa = nonpaging_gva_to_gpa; > context->sync_page = nonpaging_sync_page; > context->invlpg = NULL; > - context->update_pte = nonpaging_update_pte; > context->root_level = 0; > context->shadow_root_level = PT32E_ROOT_LEVEL; > context->direct_map = true; > @@ -4395,7 +4387,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, > context->gva_to_gpa = paging64_gva_to_gpa; > context->sync_page = paging64_sync_page; > context->invlpg = paging64_invlpg; > - context->update_pte = paging64_update_pte; > context->shadow_root_level = level; > context->direct_map = false; > } > @@ -4424,7 +4415,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, > context->gva_to_gpa = paging32_gva_to_gpa; > context->sync_page = paging32_sync_page; > context->invlpg = paging32_invlpg; > - context->update_pte = paging32_update_pte; > context->shadow_root_level = PT32E_ROOT_LEVEL; > context->direct_map = false; > } > @@ -4506,7 +4496,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) > context->page_fault = kvm_tdp_page_fault; > context->sync_page = nonpaging_sync_page; > context->invlpg = NULL; > - context->update_pte = nonpaging_update_pte; > context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu); > context->direct_map = true; > context->get_guest_pgd = get_cr3; > @@ -4678,7 +4667,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, > context->gva_to_gpa = ept_gva_to_gpa; > context->sync_page = ept_sync_page; > context->invlpg = ept_invlpg; > - context->update_pte = ept_update_pte; > context->root_level = level; > context->direct_map = false; > context->mmu_role.as_u64 = new_role.as_u64; > @@ -4826,19 +4814,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu) > } > EXPORT_SYMBOL_GPL(kvm_mmu_unload); > > -static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, > - struct kvm_mmu_page *sp, u64 *spte, > - const void *new) > -{ > - if (sp->role.level != PG_LEVEL_4K) { > - ++vcpu->kvm->stat.mmu_pde_zapped; > - return; > - } > - > - ++vcpu->kvm->stat.mmu_pte_updated; > - vcpu->arch.mmu->update_pte(vcpu, sp, spte, new); > -} > - > static bool need_remote_flush(u64 old, u64 new) > { > if (!is_shadow_present_pte(old)) > @@ -4954,22 +4929,6 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) > return spte; > } > > -/* > - * Ignore various flags when determining if a SPTE can be immediately > - * overwritten for the current MMU. > - * - level: explicitly checked in mmu_pte_write_new_pte(), and will never > - * match the current MMU role, as MMU's level tracks the root level. > - * - access: updated based on the new guest PTE > - * - quadrant: handled by get_written_sptes() > - * - invalid: always false (loop only walks valid shadow pages) > - */ > -static const union kvm_mmu_page_role role_ign = { > - .level = 0xf, > - .access = 0x7, > - .quadrant = 0x3, > - .invalid = 0x1, > -}; > - > static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, > const u8 *new, int bytes, > struct kvm_page_track_notifier_node *node) > @@ -5020,14 +4979,10 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, > > local_flush = true; > while (npte--) { > - u32 base_role = vcpu->arch.mmu->mmu_role.base.word; > - > entry = *spte; > mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); > - if (gentry && > - !((sp->role.word ^ base_role) & ~role_ign.word) && > - rmap_can_add(vcpu)) > - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); > + if (gentry && sp->role.level != PG_LEVEL_4K) I am wondering, if there's any chance the sp->role.level would be PG_LEVEL_4K in kvm_mmu_pte_write()? My previous understanding was that, since the gfn of guest leaf page tables are never page tracked, the sp here shall only be with level greater than PG_LEVEL_4K. Did I miss anything here? Thanks! :) B.R. Yu > + ++vcpu->kvm->stat.mmu_pde_zapped; > if (need_remote_flush(entry, *spte)) > remote_flush = true; > ++spte; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index a480804ae27a..d9f5d9acccc1 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -233,7 +233,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { > VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), > VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), > VM_STAT("mmu_pte_write", mmu_pte_write), > - VM_STAT("mmu_pte_updated", mmu_pte_updated), > VM_STAT("mmu_pde_zapped", mmu_pde_zapped), > VM_STAT("mmu_flooded", mmu_flooded), > VM_STAT("mmu_recycled", mmu_recycled), > -- > 2.30.0.284.gd98b1dd5eaa7-goog >