For implementation mmu_notifier_{test,clear}_young, the KVM memslot walker used to take the MMU lock for us. Now make the architectures take it themselves. Don't relax locking for any architecture except powerpc e500; its implementations of kvm_age_gfn and kvm_test_age_gfn simply return false, so there is no need to grab the KVM MMU lock. Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> --- arch/arm64/kvm/mmu.c | 30 ++++++++++++++++++++++-------- arch/loongarch/kvm/mmu.c | 20 +++++++++++++++----- arch/mips/kvm/mmu.c | 21 ++++++++++++++++----- arch/powerpc/kvm/book3s.c | 14 ++++++++++++-- arch/riscv/kvm/mmu.c | 26 ++++++++++++++++++++------ arch/x86/kvm/mmu/mmu.c | 8 ++++++++ virt/kvm/kvm_main.c | 4 ++-- 7 files changed, 95 insertions(+), 28 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8bcab0cc3fe9..8337009dde77 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1773,25 +1773,39 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { u64 size = (range->end - range->start) << PAGE_SHIFT; + bool young = false; + + write_lock(&kvm->mmu_lock); if (!kvm->arch.mmu.pgt) - return false; + goto out; - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, - range->start << PAGE_SHIFT, - size, true); + young = kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, + range->start << PAGE_SHIFT, + size, true); + +out: + write_unlock(&kvm->mmu_lock); + return young; } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { u64 size = (range->end - range->start) << PAGE_SHIFT; + bool young = false; + + write_lock(&kvm->mmu_lock); if (!kvm->arch.mmu.pgt) - return false; + goto out; - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, - range->start << PAGE_SHIFT, - size, false); + young = kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, + range->start << PAGE_SHIFT, + size, false); + +out: + write_unlock(&kvm->mmu_lock); + return young; } phys_addr_t kvm_mmu_get_httbr(void) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 98883aa23ab8..5eb262bcf6b0 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -497,24 +497,34 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_ptw_ctx ctx; + bool young; + + spin_lock(&kvm->mmu_lock); ctx.flag = 0; ctx.ops = kvm_mkold_pte; kvm_ptw_prepare(kvm, &ctx); - return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT, + young = kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT, range->end << PAGE_SHIFT, &ctx); + + spin_unlock(&kvm->mmu_lock); + return young; } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { gpa_t gpa = range->start << PAGE_SHIFT; - kvm_pte_t *ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); + kvm_pte_t *ptep; + bool young; - if (ptep && kvm_pte_present(NULL, ptep) && kvm_pte_young(*ptep)) - return true; + spin_lock(&kvm->mmu_lock); + ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); - return false; + young = ptep && kvm_pte_present(NULL, ptep) && kvm_pte_young(*ptep); + + spin_unlock(&kvm->mmu_lock); + return young; } /* diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index c17157e700c0..db3b7cf22db1 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -446,17 +446,28 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end); + bool young; + + spin_lock(&kvm->mmu_lock); + young = kvm_mips_mkold_gpa_pt(kvm, range->start, range->end); + spin_unlock(&kvm->mmu_lock); + return young; } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { gpa_t gpa = range->start << PAGE_SHIFT; - pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + pte_t *gpa_pte; + bool young = false; - if (!gpa_pte) - return false; - return pte_young(*gpa_pte); + spin_lock(&kvm->mmu_lock); + gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + + if (gpa_pte) + young = pte_young(*gpa_pte); + + spin_unlock(&kvm->mmu_lock); + return young; } /** diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ff6c38373957..f503ab9ac3a5 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -887,12 +887,22 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - return kvm->arch.kvm_ops->age_gfn(kvm, range); + bool young; + + spin_lock(&kvm->mmu_lock); + young = kvm->arch.kvm_ops->age_gfn(kvm, range); + spin_unlock(&kvm->mmu_lock); + return young; } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - return kvm->arch.kvm_ops->test_age_gfn(kvm, range); + bool young; + + spin_lock(&kvm->mmu_lock); + young = kvm->arch.kvm_ops->test_age_gfn(kvm, range); + spin_unlock(&kvm->mmu_lock); + return young; } int kvmppc_core_init_vm(struct kvm *kvm) diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index b63650f9b966..c78abe8041fb 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -555,17 +555,24 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) pte_t *ptep; u32 ptep_level = 0; u64 size = (range->end - range->start) << PAGE_SHIFT; + bool young = false; + + spin_lock(&kvm->mmu_lock); if (!kvm->arch.pgd) - return false; + goto out; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT, &ptep, &ptep_level)) - return false; + goto out; + + young = ptep_test_and_clear_young(NULL, 0, ptep); - return ptep_test_and_clear_young(NULL, 0, ptep); +out: + spin_unlock(&kvm->mmu_lock); + return young; } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) @@ -573,17 +580,24 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) pte_t *ptep; u32 ptep_level = 0; u64 size = (range->end - range->start) << PAGE_SHIFT; + bool young = false; + + spin_lock(&kvm->mmu_lock); if (!kvm->arch.pgd) - return false; + goto out; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT, &ptep, &ptep_level)) - return false; + goto out; + + young = pte_young(ptep_get(ptep)); - return pte_young(ptep_get(ptep)); +out: + spin_unlock(&kvm->mmu_lock); + return young; } int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 662f62dfb2aa..6a2a557c2c31 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1630,12 +1630,16 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; + write_lock(&kvm->mmu_lock); + if (kvm_memslots_have_rmaps(kvm)) young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_age_gfn_range(kvm, range); + write_unlock(&kvm->mmu_lock); + return young; } @@ -1643,12 +1647,16 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { bool young = false; + write_lock(&kvm->mmu_lock); + if (kvm_memslots_have_rmaps(kvm)) young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap); if (tdp_mmu_enabled) young |= kvm_tdp_mmu_test_age_gfn(kvm, range); + write_unlock(&kvm->mmu_lock); + return young; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d197b6725cb3..8d2d3acf18d8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -901,7 +901,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, * more sophisticated heuristic later. */ return kvm_handle_hva_range_no_flush(mn, start, end, - kvm_age_gfn, false); + kvm_age_gfn, true); } static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, @@ -911,7 +911,7 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, trace_kvm_test_age_hva(address); return kvm_handle_hva_range_no_flush(mn, address, address + 1, - kvm_test_age_gfn, false); + kvm_test_age_gfn, true); } static void kvm_mmu_notifier_release(struct mmu_notifier *mn, -- 2.45.1.288.g0e0cd299f1-goog