Re: [PATCH 3/3] kvm: mmu: Separate pte generation from set_spte

Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> · Wed, 05 Feb 2020 14:52:32 +0100

Ben Gardon <bgardon@xxxxxxxxxx> writes:

> Separate the functions for generating leaf page table entries from the
> function that inserts them into the paging structure. This refactoring
> will facilitate changes to the MMU sychronization model to use atomic
> compare / exchanges (which are not guaranteed to succeed) instead of a
> monolithic MMU lock.
>
> No functional change expected.
>
> Tested by running kvm-unit-tests on an Intel Haswell machine. This
> commit introduced no new failures.
>
> This commit can be viewed in Gerrit at:
> 	https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2360
>
> Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
> Reviewed-by: Peter Shier <pshier@xxxxxxxxxx>
> ---
>  arch/x86/kvm/mmu/mmu.c | 52 +++++++++++++++++++++++++++---------------
>  1 file changed, 34 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index b81010d0edae1..9239ad5265dc6 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3000,20 +3000,14 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
>  #define SET_SPTE_WRITE_PROTECTED_PT	BIT(0)
>  #define SET_SPTE_NEED_REMOTE_TLB_FLUSH	BIT(1)
>  
> -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> -		    unsigned int pte_access, int level,
> -		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
> -		    bool can_unsync, bool host_writable)
> +static u64 make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
> +		     gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool speculative,
> +		     bool can_unsync, bool host_writable, bool ad_disabled,
> +		     int *ret)

With such a long parameter list we may think about passing a pointer to
a structure instead (common for make_spte()/set_spte())

>  {
>  	u64 spte = 0;
> -	int ret = 0;
> -	struct kvm_mmu_page *sp;
> -
> -	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
> -		return 0;
>  
> -	sp = page_header(__pa(sptep));
> -	if (sp_ad_disabled(sp))
> +	if (ad_disabled)
>  		spte |= SPTE_AD_DISABLED_MASK;
>  	else if (kvm_vcpu_ad_need_write_protect(vcpu))
>  		spte |= SPTE_AD_WRPROT_ONLY_MASK;
> @@ -3066,27 +3060,49 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  		 * is responsibility of mmu_get_page / kvm_sync_page.
>  		 * Same reasoning can be applied to dirty page accounting.
>  		 */
> -		if (!can_unsync && is_writable_pte(*sptep))
> -			goto set_pte;
> +		if (!can_unsync && is_writable_pte(old_spte))
> +			return spte;
>  
>  		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
>  			pgprintk("%s: found shadow page for %llx, marking ro\n",
>  				 __func__, gfn);
> -			ret |= SET_SPTE_WRITE_PROTECTED_PT;
> +			*ret |= SET_SPTE_WRITE_PROTECTED_PT;
>  			pte_access &= ~ACC_WRITE_MASK;
>  			spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
>  		}
>  	}
>  
> -	if (pte_access & ACC_WRITE_MASK) {
> -		kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +	if (pte_access & ACC_WRITE_MASK)
>  		spte |= spte_shadow_dirty_mask(spte);
> -	}
>  
>  	if (speculative)
>  		spte = mark_spte_for_access_track(spte);
>  
> -set_pte:
> +	return spte;
> +}
> +
> +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> +		    unsigned int pte_access, int level,
> +		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
> +		    bool can_unsync, bool host_writable)
> +{
> +	u64 spte = 0;
> +	struct kvm_mmu_page *sp;
> +	int ret = 0;
> +
> +	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
> +		return 0;
> +
> +	sp = page_header(__pa(sptep));
> +
> +	spte = make_spte(vcpu, pte_access, level, gfn, pfn, *sptep, speculative,
> +			 can_unsync, host_writable, sp_ad_disabled(sp), &ret);

I'm probably missing something, but in make_spte() I see just one place
which writes to '*ret' so at the end, this is either
SET_SPTE_WRITE_PROTECTED_PT or 0 (which we got only because we
initialize it to 0 in set_spte()). Unless this is preparation to some
other change, I don't see much value in the complication.

Can we actually reverse the logic, pass 'spte' by reference and return
'ret'?

> +	if (!spte)
> +		return 0;
> +
> +	if (spte & PT_WRITABLE_MASK)
> +		kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +
>  	if (mmu_spte_update(sptep, spte))
>  		ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
>  	return ret;

-- 
Vitaly