On Fri, Aug 9, 2024 at 12:44 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote: > > DO NOT MERGE, yet... > > Cc: James Houghton <jthoughton@xxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 63 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 59 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 48e8608c2738..9df6b465de06 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -995,13 +995,11 @@ static void kvm_rmap_unlock(struct kvm_rmap_head *rmap_head, > * locking is the same, but the caller is disallowed from modifying the rmap, > * and so the unlock flow is a nop if the rmap is/was empty. > */ > -__maybe_unused > static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head) > { > return __kvm_rmap_lock(rmap_head); > } > > -__maybe_unused > static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head, > unsigned long old_val) > { > @@ -1743,8 +1741,53 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot, > __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access); > } > > -static bool kvm_rmap_age_gfn_range(struct kvm *kvm, > - struct kvm_gfn_range *range, bool test_only) > +static bool kvm_rmap_age_gfn_range_lockless(struct kvm *kvm, > + struct kvm_gfn_range *range, > + bool test_only) > +{ > + struct kvm_rmap_head *rmap_head; > + struct rmap_iterator iter; > + unsigned long rmap_val; > + bool young = false; > + u64 *sptep; > + gfn_t gfn; > + int level; > + u64 spte; > + > + for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { > + for (gfn = range->start; gfn < range->end; > + gfn += KVM_PAGES_PER_HPAGE(level)) { > + rmap_head = gfn_to_rmap(gfn, level, range->slot); > + rmap_val = kvm_rmap_lock_readonly(rmap_head); > + > + for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) { > + if (!is_accessed_spte(spte)) > + continue; > + > + if (test_only) { > + kvm_rmap_unlock_readonly(rmap_head, rmap_val); > + return true; > + } > + > + /* > + * Marking SPTEs for access tracking outside of > + * mmu_lock is unsupported. Report the page as > + * young, but otherwise leave it as-is. Just for my own understanding, what's the main reason why it's unsafe to mark PTEs for access tracking outside the mmu_lock? > + */ > + if (spte_ad_enabled(spte)) > + clear_bit((ffs(shadow_accessed_mask) - 1), > + (unsigned long *)sptep); I feel like it'd be kinda nice to de-duplicate this clear_bit() piece with the one in kvm_rmap_age_gfn_range(). > + young = true; > + } > + > + kvm_rmap_unlock_readonly(rmap_head, rmap_val); > + } > + } > + return young; > +} > + > +static bool __kvm_rmap_age_gfn_range(struct kvm *kvm, > + struct kvm_gfn_range *range, bool test_only) > { > struct slot_rmap_walk_iterator iterator; > struct rmap_iterator iter; > @@ -1783,6 +1826,18 @@ static bool kvm_rmap_age_gfn_range(struct kvm *kvm, > return young; > } > > + > +static bool kvm_rmap_age_gfn_range(struct kvm *kvm, > + struct kvm_gfn_range *range, bool test_only) > +{ > + /* FIXME: This also needs to be guarded with something like range->fast_only. */ > + if (kvm_ad_enabled()) I expect this to be something like `if (kvm_ad_enabled() || range->fast_only)`. With MGLRU, that means the pages will always be the last candidates for eviction, though it is still possible for them to be evicted (though I think this would basically never happen). I think this is fine. I think the only other possible choice is to record/return 'not young'/false instead of 'young'/true if the spte is young but !spte_ad_enabled(). That doesn't seem to be obviously better, though we *will* get correct age information at eviction time, when !range->fast_only, at which point the page will not be evicted, and Accessed will be cleared. > + return kvm_rmap_age_gfn_range_lockless(kvm, range, test_only); > + > + lockdep_assert_held_write(&kvm->mmu_lock); > + return __kvm_rmap_age_gfn_range(kvm, range, test_only); > +} > + > bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) > { > bool young = false; > -- > 2.46.0.76.ge559c4bf1a-goog >