From: Sean Christopherson <seanjc@xxxxxxxxxx> When A/D bits are supported on sptes, it is safe to simply clear the Accessed bits. The less obvious case is marking sptes for access tracking in the non-A/D case (for EPT only). In this case, we have to be sure that it is okay for TLB entries to exist for non-present sptes. For example, when doing dirty tracking, if we come across a non-present SPTE, we need to know that we need to do a TLB invalidation. This case is already supported today (as we already support *not* doing TLBIs for clear_young(); there is a separate notifier for clearing *and* flushing, clear_flush_young()). This works today because GET_DIRTY_LOG flushes the TLB before returning to userspace. Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> Co-developed-by: James Houghton <jthoughton@xxxxxxxxxx> Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 72 +++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 71019762a28a..bdd6abf9b44e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -952,13 +952,11 @@ static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head) * locking is the same, but the caller is disallowed from modifying the rmap, * and so the unlock flow is a nop if the rmap is/was empty. */ -__maybe_unused static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head) { return __kvm_rmap_lock(rmap_head); } -__maybe_unused static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head, unsigned long old_val) { @@ -1677,37 +1675,48 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, } static bool kvm_rmap_age_gfn_range(struct kvm *kvm, - struct kvm_gfn_range *range, bool test_only) + struct kvm_gfn_range *range, + bool test_only) { - struct slot_rmap_walk_iterator iterator; + struct kvm_rmap_head *rmap_head; struct rmap_iterator iter; + unsigned long rmap_val; bool young = false; u64 *sptep; + gfn_t gfn; + int level; + u64 spte; - for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, - range->start, range->end - 1, &iterator) { - for_each_rmap_spte(iterator.rmap, &iter, sptep) { - u64 spte = *sptep; + for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { + for (gfn = range->start; gfn < range->end; + gfn += KVM_PAGES_PER_HPAGE(level)) { + rmap_head = gfn_to_rmap(gfn, level, range->slot); + rmap_val = kvm_rmap_lock_readonly(rmap_head); - if (!is_accessed_spte(spte)) - continue; + for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) { + if (!is_accessed_spte(spte)) + continue; + + if (test_only) { + kvm_rmap_unlock_readonly(rmap_head, rmap_val); + return true; + } - if (test_only) - return true; - - if (spte_ad_enabled(spte)) { - clear_bit((ffs(shadow_accessed_mask) - 1), - (unsigned long *)sptep); - } else { - /* - * WARN if mmu_spte_update() signals the need - * for a TLB flush, as Access tracking a SPTE - * should never trigger an _immediate_ flush. - */ - spte = mark_spte_for_access_track(spte); - WARN_ON_ONCE(mmu_spte_update(sptep, spte)); + if (spte_ad_enabled(spte)) + clear_bit((ffs(shadow_accessed_mask) - 1), + (unsigned long *)sptep); + else + /* + * If the following cmpxchg fails, the + * spte is being concurrently modified + * and should most likely stay young. + */ + cmpxchg64(sptep, spte, + mark_spte_for_access_track(spte)); + young = true; } - young = true; + + kvm_rmap_unlock_readonly(rmap_head, rmap_val); } } return young; @@ -1725,11 +1734,8 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (tdp_mmu_enabled) young = kvm_tdp_mmu_age_gfn_range(kvm, range); - if (kvm_has_shadow_mmu_sptes(kvm)) { - write_lock(&kvm->mmu_lock); + if (kvm_has_shadow_mmu_sptes(kvm)) young |= kvm_rmap_age_gfn_range(kvm, range, false); - write_unlock(&kvm->mmu_lock); - } return young; } @@ -1741,11 +1747,11 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (tdp_mmu_enabled) young = kvm_tdp_mmu_test_age_gfn(kvm, range); - if (!young && kvm_has_shadow_mmu_sptes(kvm)) { - write_lock(&kvm->mmu_lock); + if (young) + return young; + + if (kvm_has_shadow_mmu_sptes(kvm)) young |= kvm_rmap_age_gfn_range(kvm, range, true); - write_unlock(&kvm->mmu_lock); - } return young; } -- 2.47.0.199.ga7371fff76-goog