>--- a/arch/x86/kvm/mmu/mmu.c >+++ b/arch/x86/kvm/mmu/mmu.c >@@ -3717,7 +3717,12 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) > goto out_unlock; > > if (tdp_mmu_enabled) { >- root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu); >+ if (kvm_gfn_shared_mask(vcpu->kvm) && >+ !VALID_PAGE(mmu->private_root_hpa)) { >+ root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu, true); >+ mmu->private_root_hpa = root; just mmu->private_root_hpa = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu, true); >+ } >+ root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu, false); > mmu->root.hpa = root; ditto > } else if (shadow_root_level >= PT64_ROOT_4LEVEL) { > root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level); >@@ -4627,7 +4632,7 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > if (kvm_mmu_honors_guest_mtrrs(vcpu->kvm)) { > for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) { > int page_num = KVM_PAGES_PER_HPAGE(fault->max_level); >- gfn_t base = gfn_round_for_level(fault->gfn, >+ gfn_t base = gfn_round_for_level(gpa_to_gfn(fault->addr), > fault->max_level); ... > > if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num)) >@@ -4662,6 +4667,7 @@ int kvm_mmu_map_tdp_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, > }; > > WARN_ON_ONCE(!vcpu->arch.mmu->root_role.direct); >+ fault.gfn = gpa_to_gfn(fault.addr) & ~kvm_gfn_shared_mask(vcpu->kvm); Could you clarify when shared bits need to be masked out or kept? shared bits are masked out here but kept in the hunk right above and .. >+++ b/arch/x86/kvm/mmu/tdp_iter.h >@@ -91,7 +91,7 @@ struct tdp_iter { > tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL]; > /* A pointer to the current SPTE */ > tdp_ptep_t sptep; >- /* The lowest GFN mapped by the current SPTE */ >+ /* The lowest GFN (shared bits included) mapped by the current SPTE */ > gfn_t gfn; .. in @gfn of tdp_iter. > /* The level of the root page given to the iterator */ > int root_level; > >-hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) >+static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu, Maybe fold it into its sole caller. >+ bool private) > { > union kvm_mmu_page_role role = vcpu->arch.mmu->root_role; > struct kvm *kvm = vcpu->kvm; >@@ -221,6 +225,8 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) > * Check for an existing root before allocating a new one. Note, the > * role check prevents consuming an invalid root. > */ >+ if (private) >+ kvm_mmu_page_role_set_private(&role); > for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) { > if (root->role.word == role.word && > kvm_tdp_mmu_get_root(root)) >@@ -244,12 +250,17 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) > spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > > out: >- return __pa(root->spt); >+ return root; >+} >+ >+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu, bool private) >+{ >+ return __pa(kvm_tdp_mmu_get_vcpu_root(vcpu, private)->spt); > } > > static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, >- u64 old_spte, u64 new_spte, int level, >- bool shared); >+ u64 old_spte, u64 new_spte, >+ union kvm_mmu_page_role role, bool shared); > > static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { >@@ -376,12 +387,78 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) > REMOVED_SPTE, level); > } > handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn, >- old_spte, REMOVED_SPTE, level, shared); >+ old_spte, REMOVED_SPTE, sp->role, >+ shared); >+ } >+ >+ if (is_private_sp(sp) && >+ WARN_ON(static_call(kvm_x86_free_private_spt)(kvm, sp->gfn, sp->role.level, WARN_ON_ONCE()? >+ kvm_mmu_private_spt(sp)))) { >+ /* >+ * Failed to unlink Secure EPT page and there is nothing to do >+ * further. Intentionally leak the page to prevent the kernel >+ * from accessing the encrypted page. >+ */ >+ kvm_mmu_init_private_spt(sp, NULL); > } > > call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); > } > > rcu_read_lock(); > > for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) { >@@ -960,10 +1158,26 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, > > if (unlikely(!fault->slot)) > new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); >- else >- wrprot = make_spte(vcpu, sp, fault->slot, ACC_ALL, iter->gfn, >- fault->pfn, iter->old_spte, fault->prefetch, true, >- fault->map_writable, &new_spte); >+ else { >+ unsigned long pte_access = ACC_ALL; >+ gfn_t gfn = iter->gfn; >+ >+ if (kvm_gfn_shared_mask(vcpu->kvm)) { >+ if (fault->is_private) >+ gfn |= kvm_gfn_shared_mask(vcpu->kvm); this is an open-coded kvm_gfn_to_shared(). I don't get why a spte is installed for a shared gfn when fault->is_private is true. could you elaborate? >+ else >+ /* >+ * TDX shared GPAs are no executable, enforce >+ * this for the SDV. >+ */ what do you mean by the SDV? >+ pte_access &= ~ACC_EXEC_MASK; >+ } >+ >+ wrprot = make_spte(vcpu, sp, fault->slot, pte_access, gfn, >+ fault->pfn, iter->old_spte, >+ fault->prefetch, true, fault->map_writable, >+ &new_spte); >+ } > > if (new_spte == iter->old_spte) > ret = RET_PF_SPURIOUS; >@@ -1041,6 +1255,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > struct kvm *kvm = vcpu->kvm; > struct tdp_iter iter; > struct kvm_mmu_page *sp; >+ gfn_t raw_gfn; >+ bool is_private = fault->is_private && kvm_gfn_shared_mask(kvm); > int ret = RET_PF_RETRY; > > kvm_mmu_hugepage_adjust(vcpu, fault); >@@ -1049,7 +1265,17 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > > rcu_read_lock(); > >- tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) { >+ raw_gfn = gpa_to_gfn(fault->addr); >+ >+ if (is_error_noslot_pfn(fault->pfn) || >+ !kvm_pfn_to_refcounted_page(fault->pfn)) { >+ if (is_private) { >+ rcu_read_unlock(); >+ return -EFAULT; This needs a comment. why this check is necessary? does this imply some kernel bugs?