When clearing TDP MMU pages what have been disconnected from the paging structure root, set the SPTEs to a special non-present value which will not be overwritten by other threads. This is needed to prevent races in which a thread is clearing a disconnected page table, but another thread has already acquired a pointer to that memory and installs a mapping in an already cleared entry. This can lead to memory leaks and accounting errors. Reviewed-by: Peter Feiner <pfeiner@xxxxxxxxxx> Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- arch/x86/kvm/mmu/tdp_mmu.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 5c9d053000ad..45160ff84e91 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -333,13 +333,14 @@ static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt, { struct kvm_mmu_page *sp; gfn_t gfn; + gfn_t base_gfn; int level; u64 *sptep; u64 old_child_spte; int i; sp = sptep_to_sp(pt); - gfn = sp->gfn; + base_gfn = sp->gfn; level = sp->role.level; trace_kvm_mmu_prepare_zap_page(sp); @@ -348,16 +349,38 @@ static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt, for (i = 0; i < PT64_ENT_PER_PAGE; i++) { sptep = pt + i; + gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)); if (atomic) { - old_child_spte = xchg(sptep, 0); + /* + * Set the SPTE to a nonpresent value that other + * threads will not overwrite. If the SPTE was already + * frozen then another thread handling a page fault + * could overwrite it, so set the SPTE until it is set + * from nonfrozen -> frozen. + */ + for (;;) { + old_child_spte = xchg(sptep, FROZEN_SPTE); + if (old_child_spte != FROZEN_SPTE) + break; + cpu_relax(); + } } else { old_child_spte = READ_ONCE(*sptep); - WRITE_ONCE(*sptep, 0); + + /* + * Setting the SPTE to FROZEN_SPTE is not strictly + * necessary here as the MMU lock should stop other + * threads from concurrentrly modifying this SPTE. + * Using FROZEN_SPTE keeps the atomic and + * non-atomic cases consistent and simplifies the + * function. + */ + WRITE_ONCE(*sptep, FROZEN_SPTE); } - handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), - gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), - old_child_spte, 0, level - 1, atomic); + handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn, + old_child_spte, FROZEN_SPTE, level - 1, + atomic); } kvm_flush_remote_tlbs_with_address(kvm, gfn, -- 2.30.0.284.gd98b1dd5eaa7-goog