On Thu, Sep 26, 2019 at 04:17:58PM -0700, Ben Gardon wrote: > Separate the functions for generating leaf page table entries from the > function that inserts them into the paging structure. This refactoring > will allow changes to the MMU sychronization model to use atomic > compare / exchanges (which are not guaranteed to succeed) instead of a > monolithic MMU lock. > > Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> > --- > arch/x86/kvm/mmu.c | 93 ++++++++++++++++++++++++++++------------------ > 1 file changed, 57 insertions(+), 36 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 781c2ca7455e3..7e5ab9c6e2b09 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -2964,21 +2964,15 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) > #define SET_SPTE_WRITE_PROTECTED_PT BIT(0) > #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1) > > -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > - unsigned pte_access, int level, > - gfn_t gfn, kvm_pfn_t pfn, bool speculative, > - bool can_unsync, bool host_writable) > +static int generate_pte(struct kvm_vcpu *vcpu, unsigned pte_access, int level, Similar comment on "generate". Note, I don't necessarily like the names get_mmio_spte_value() or get_spte_value() either as they could be misinterpreted as reading the value from memory. Maybe calc_{mmio_}spte_value()? > + gfn_t gfn, kvm_pfn_t pfn, u64 old_pte, bool speculative, > + bool can_unsync, bool host_writable, bool ad_disabled, > + u64 *ptep) > { > - u64 spte = 0; > + u64 pte; Renames and unrelated refactoring (leaving the variable uninitialized and setting it directdly to shadow_present_mask) belong in separate patches. The renames especially make this patch much more difficult to review. And, I disagree with the rename, I think it's important to keep the "spte" nomenclature, even though it's a bit of a misnomer for TDP entries, so that it is easy to differentiate data that is coming from the host PTEs versus data that is for KVM's MMU. > int ret = 0; > - struct kvm_mmu_page *sp; > - > - if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) > - return 0; > > - sp = page_header(__pa(sptep)); > - if (sp_ad_disabled(sp)) > - spte |= shadow_acc_track_value; > + *ptep = 0; > > /* > * For the EPT case, shadow_present_mask is 0 if hardware > @@ -2986,36 +2980,39 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > * ACC_USER_MASK and shadow_user_mask are used to represent > * read access. See FNAME(gpte_access) in paging_tmpl.h. > */ > - spte |= shadow_present_mask; > + pte = shadow_present_mask; > + > + if (ad_disabled) > + pte |= shadow_acc_track_value; > + > if (!speculative) > - spte |= spte_shadow_accessed_mask(spte); > + pte |= spte_shadow_accessed_mask(pte); > > if (pte_access & ACC_EXEC_MASK) > - spte |= shadow_x_mask; > + pte |= shadow_x_mask; > else > - spte |= shadow_nx_mask; > + pte |= shadow_nx_mask; > > if (pte_access & ACC_USER_MASK) > - spte |= shadow_user_mask; > + pte |= shadow_user_mask; > > if (level > PT_PAGE_TABLE_LEVEL) > - spte |= PT_PAGE_SIZE_MASK; > + pte |= PT_PAGE_SIZE_MASK; > if (tdp_enabled) > - spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, > + pte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, > kvm_is_mmio_pfn(pfn)); > > if (host_writable) > - spte |= SPTE_HOST_WRITEABLE; > + pte |= SPTE_HOST_WRITEABLE; > else > pte_access &= ~ACC_WRITE_MASK; > > if (!kvm_is_mmio_pfn(pfn)) > - spte |= shadow_me_mask; > + pte |= shadow_me_mask; > > - spte |= (u64)pfn << PAGE_SHIFT; > + pte |= (u64)pfn << PAGE_SHIFT; > > if (pte_access & ACC_WRITE_MASK) { > - > /* > * Other vcpu creates new sp in the window between > * mapping_level() and acquiring mmu-lock. We can > @@ -3024,9 +3021,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > */ > if (level > PT_PAGE_TABLE_LEVEL && > mmu_gfn_lpage_is_disallowed(vcpu, gfn, level)) > - goto done; > + return 0; > > - spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; > + pte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; > > /* > * Optimization: for pte sync, if spte was writable the hash > @@ -3034,30 +3031,54 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > * is responsibility of mmu_get_page / kvm_sync_page. > * Same reasoning can be applied to dirty page accounting. > */ > - if (!can_unsync && is_writable_pte(*sptep)) > - goto set_pte; > + if (!can_unsync && is_writable_pte(old_pte)) { > + *ptep = pte; > + return 0; > + } > > if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { > pgprintk("%s: found shadow page for %llx, marking ro\n", > __func__, gfn); > - ret |= SET_SPTE_WRITE_PROTECTED_PT; > + ret = SET_SPTE_WRITE_PROTECTED_PT; More unnecessary refactoring. > pte_access &= ~ACC_WRITE_MASK; > - spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); > + pte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); > } > } > > - if (pte_access & ACC_WRITE_MASK) { > - kvm_vcpu_mark_page_dirty(vcpu, gfn); > - spte |= spte_shadow_dirty_mask(spte); > - } > + if (pte_access & ACC_WRITE_MASK) > + pte |= spte_shadow_dirty_mask(pte); > > if (speculative) > - spte = mark_spte_for_access_track(spte); > + pte = mark_spte_for_access_track(pte); > + > + *ptep = pte; > + return ret; > +} > + > +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, > + int level, gfn_t gfn, kvm_pfn_t pfn, bool speculative, > + bool can_unsync, bool host_writable) > +{ > + u64 spte; > + int ret; > + struct kvm_mmu_page *sp; > + > + if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) > + return 0; > + > + sp = page_header(__pa(sptep)); > + > + ret = generate_pte(vcpu, pte_access, level, gfn, pfn, *sptep, > + speculative, can_unsync, host_writable, > + sp_ad_disabled(sp), &spte); Yowsers, that's a big prototype. This is something that came up in an unrelated internal discussion. I wonder if it would make sense to define a struct to hold all of the data needed to insert an spte and pass that on the stack isntead of having a bajillion parameters. Just spitballing, no idea if it's feasible and/or reasonable. > + if (!spte) > + return 0; > + > + if (spte & PT_WRITABLE_MASK) > + kvm_vcpu_mark_page_dirty(vcpu, gfn); > > -set_pte: > if (mmu_spte_update(sptep, spte)) > ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH; > -done: > return ret; > } > > -- > 2.23.0.444.g18eeb5a265-goog >