Re: [PATCH v8 039/103] KVM: x86/tdp_mmu: Init role member of struct kvm_mmu_page at allocation

Yuan Yao <yuan.yao@xxxxxxxxxxxxxxx> · Thu, 1 Sep 2022 15:12:23 +0800

On Sun, Aug 07, 2022 at 03:01:24PM -0700, isaku.yamahata@xxxxxxxxx wrote:
> From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
>
> Refactor tdp_mmu_alloc_sp() and tdp_mmu_init_sp and eliminate
> tdp_mmu_init_child_sp().  Currently tdp_mmu_init_sp() (or
> tdp_mmu_init_child_sp()) sets kvm_mmu_page.role after tdp_mmu_alloc_sp()
> allocating struct kvm_mmu_page and its page table page.  This patch makes
> tdp_mmu_alloc_sp() initialize kvm_mmu_page.role instead of
> tdp_mmu_init_sp().
>
> To handle private page tables, argument of is_private needs to be passed
> down.  Given that already page level is passed down, it would be cumbersome
> to add one more parameter about sp. Instead replace the level argument with
> union kvm_mmu_page_role.  Thus the number of argument won't be increased
> and more info about sp can be passed down.

Please update the description, no level argument is replaced in this
patch.

>
> For private sp, secure page table will be also allocated in addition to
> struct kvm_mmu_page and page table (spt member).  The allocation functions
> (tdp_mmu_alloc_sp() and __tdp_mmu_alloc_sp_for_split()) need to know if the
> allocation is for the conventional page table or private page table.  Pass
> union kvm_mmu_role to those functions and initialize role member of struct
> kvm_mmu_page.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> ---
>  arch/x86/kvm/mmu/tdp_iter.h | 12 ++++++++++
>  arch/x86/kvm/mmu/tdp_mmu.c  | 45 +++++++++++++++++--------------------
>  2 files changed, 32 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
> index f0af385c56e0..9e56a5b1024c 100644
> --- a/arch/x86/kvm/mmu/tdp_iter.h
> +++ b/arch/x86/kvm/mmu/tdp_iter.h
> @@ -115,4 +115,16 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
>  void tdp_iter_next(struct tdp_iter *iter);
>  void tdp_iter_restart(struct tdp_iter *iter);
>
> +static inline union kvm_mmu_page_role tdp_iter_child_role(struct tdp_iter *iter)
> +{
> +	union kvm_mmu_page_role child_role;
> +	struct kvm_mmu_page *parent_sp;
> +
> +	parent_sp = sptep_to_sp(rcu_dereference(iter->sptep));
> +
> +	child_role = parent_sp->role;
> +	child_role.level--;
> +	return child_role;
> +}
> +
>  #endif /* __KVM_X86_MMU_TDP_ITER_H */
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 90b468a3a1a2..ce69535754ff 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -271,22 +271,28 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
>  		    kvm_mmu_page_as_id(_root) != _as_id) {		\
>  		} else
>
> -static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
> +static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu,
> +					     union kvm_mmu_page_role role)
>  {
>  	struct kvm_mmu_page *sp;
>
>  	sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
>  	sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
> +	sp->role = role;
>
>  	return sp;
>  }
>
>  static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
> -			    gfn_t gfn, union kvm_mmu_page_role role)
> +			    gfn_t gfn)
>  {
>  	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
>
> -	sp->role = role;
> +	/*
> +	 * role must be set before calling this function.  At least role.level
> +	 * is not 0 (PG_LEVEL_NONE).
> +	 */
> +	WARN_ON(!sp->role.word);
>  	sp->gfn = gfn;
>  	sp->ptep = sptep;
>  	sp->tdp_mmu_page = true;
> @@ -294,20 +300,6 @@ static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
>  	trace_kvm_mmu_get_page(sp, true);
>  }
>
> -static void tdp_mmu_init_child_sp(struct kvm_mmu_page *child_sp,
> -				  struct tdp_iter *iter)
> -{
> -	struct kvm_mmu_page *parent_sp;
> -	union kvm_mmu_page_role role;
> -
> -	parent_sp = sptep_to_sp(rcu_dereference(iter->sptep));
> -
> -	role = parent_sp->role;
> -	role.level--;
> -
> -	tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role);
> -}
> -
>  hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
>  {
>  	union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
> @@ -326,8 +318,8 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
>  			goto out;
>  	}
>
> -	root = tdp_mmu_alloc_sp(vcpu);
> -	tdp_mmu_init_sp(root, NULL, 0, role);
> +	root = tdp_mmu_alloc_sp(vcpu, role);
> +	tdp_mmu_init_sp(root, NULL, 0);
>
>  	refcount_set(&root->tdp_mmu_root_count, 1);
>
> @@ -1154,8 +1146,8 @@ static int tdp_mmu_populate_nonleaf(
>  	WARN_ON(is_shadow_present_pte(iter->old_spte));
>  	WARN_ON(is_removed_spte(iter->old_spte));
>
> -	sp = tdp_mmu_alloc_sp(vcpu);
> -	tdp_mmu_init_child_sp(sp, iter);
> +	sp = tdp_mmu_alloc_sp(vcpu, tdp_iter_child_role(iter));
> +	tdp_mmu_init_sp(sp, iter->sptep, iter->gfn);
>
>  	ret = tdp_mmu_link_sp(vcpu->kvm, iter, sp, account_nx, true);
>  	if (ret)
> @@ -1423,7 +1415,8 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
>  	return spte_set;
>  }
>
> -static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
> +static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(
> +	gfp_t gfp, union kvm_mmu_page_role role)
>  {
>  	struct kvm_mmu_page *sp;
>
> @@ -1433,6 +1426,7 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
>  	if (!sp)
>  		return NULL;
>
> +	sp->role = role;
>  	sp->spt = (void *)__get_free_page(gfp);
>  	if (!sp->spt) {
>  		kmem_cache_free(mmu_page_header_cache, sp);
> @@ -1446,6 +1440,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
>  						       struct tdp_iter *iter,
>  						       bool shared)
>  {
> +	union kvm_mmu_page_role role = tdp_iter_child_role(iter);
>  	struct kvm_mmu_page *sp;
>
>  	/*
> @@ -1457,7 +1452,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
>  	 * If this allocation fails we drop the lock and retry with reclaim
>  	 * allowed.
>  	 */
> -	sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT);
> +	sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT, role);
>  	if (sp)
>  		return sp;
>
> @@ -1469,7 +1464,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
>  		write_unlock(&kvm->mmu_lock);
>
>  	iter->yielded = true;
> -	sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT);
> +	sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT, role);
>
>  	if (shared)
>  		read_lock(&kvm->mmu_lock);
> @@ -1488,7 +1483,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
>  	const int level = iter->level;
>  	int ret, i;
>
> -	tdp_mmu_init_child_sp(sp, iter);
> +	tdp_mmu_init_sp(sp, iter->sptep, iter->gfn);
>
>  	/*
>  	 * No need for atomics when writing to sp->spt since the page table has
> --
> 2.25.1
>