On Fri, Jun 11, 2021 at 4:57 PM David Matlack <dmatlack@xxxxxxxxxx> wrote: > > This commit enables the fast_page_fault handler to work when the TDP MMU > is enabled by leveraging the new walk_shadow_page_lockless* API to > collect page walks independent of the TDP MMU. > > fast_page_fault was already using > walk_shadow_page_lockless_{begin,end}(), we just have to change the > actual walk to use walk_shadow_page_lockless() which does the right > thing if the TDP MMU is in use. > > Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> Adding this feature was suggested by Ben Gardon: Suggested-by: Ben Gardon <bgardon@xxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 52 +++++++++++++++++------------------------- > 1 file changed, 21 insertions(+), 31 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 765f5b01768d..5562727c3699 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -657,6 +657,9 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) > local_irq_enable(); > } > > +static bool walk_shadow_page_lockless(struct kvm_vcpu *vcpu, u64 addr, > + struct shadow_page_walk *walk); > + > static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) > { > int r; > @@ -2967,14 +2970,9 @@ static bool page_fault_can_be_fast(u32 error_code) > * Returns true if the SPTE was fixed successfully. Otherwise, > * someone else modified the SPTE from its original value. > */ > -static bool > -fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > - u64 *sptep, u64 old_spte, u64 new_spte) > +static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, gpa_t gpa, > + u64 *sptep, u64 old_spte, u64 new_spte) > { > - gfn_t gfn; > - > - WARN_ON(!sp->role.direct); > - > /* > * Theoretically we could also set dirty bit (and flush TLB) here in > * order to eliminate unnecessary PML logging. See comments in > @@ -2990,14 +2988,8 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > if (cmpxchg64(sptep, old_spte, new_spte) != old_spte) > return false; > > - if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) { > - /* > - * The gfn of direct spte is stable since it is > - * calculated by sp->gfn. > - */ > - gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); > - kvm_vcpu_mark_page_dirty(vcpu, gfn); > - } > + if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) > + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); > > return true; > } > @@ -3019,10 +3011,9 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) > */ > static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) > { > - struct kvm_shadow_walk_iterator iterator; > - struct kvm_mmu_page *sp; > int ret = RET_PF_INVALID; > u64 spte = 0ull; > + u64 *sptep = NULL; > uint retry_count = 0; > > if (!page_fault_can_be_fast(error_code)) > @@ -3031,17 +3022,19 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) > walk_shadow_page_lockless_begin(vcpu); > > do { > + struct shadow_page_walk walk; > u64 new_spte; > > - for_each_shadow_entry_lockless(vcpu, gpa, iterator, spte) > - if (!is_shadow_present_pte(spte)) > - break; > + if (!walk_shadow_page_lockless(vcpu, gpa, &walk)) > + break; > + > + spte = walk.sptes[walk.last_level]; > + sptep = walk.spteps[walk.last_level]; > > if (!is_shadow_present_pte(spte)) > break; > > - sp = sptep_to_sp(iterator.sptep); > - if (!is_last_spte(spte, sp->role.level)) > + if (!is_last_spte(spte, walk.last_level)) > break; > > /* > @@ -3084,7 +3077,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) > * > * See the comments in kvm_arch_commit_memory_region(). > */ > - if (sp->role.level > PG_LEVEL_4K) > + if (walk.last_level > PG_LEVEL_4K) > break; > } > > @@ -3098,8 +3091,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) > * since the gfn is not stable for indirect shadow page. See > * Documentation/virt/kvm/locking.rst to get more detail. > */ > - if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte, > - new_spte)) { > + if (fast_pf_fix_direct_spte(vcpu, gpa, sptep, spte, new_spte)) { > ret = RET_PF_FIXED; > break; > } > @@ -3112,7 +3104,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) > > } while (true); > > - trace_fast_page_fault(vcpu, gpa, error_code, iterator.sptep, spte, ret); > + trace_fast_page_fault(vcpu, gpa, error_code, sptep, spte, ret); > walk_shadow_page_lockless_end(vcpu); > > return ret; > @@ -3748,11 +3740,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, > if (page_fault_handle_page_track(vcpu, error_code, gfn)) > return RET_PF_EMULATE; > > - if (!is_vcpu_using_tdp_mmu(vcpu)) { > - r = fast_page_fault(vcpu, gpa, error_code); > - if (r != RET_PF_INVALID) > - return r; > - } > + r = fast_page_fault(vcpu, gpa, error_code); > + if (r != RET_PF_INVALID) > + return r; > > r = mmu_topup_memory_caches(vcpu, false); > if (r) > -- > 2.32.0.272.g935e593368-goog >