Re: [RFC PATCH v5 045/104] KVM: x86/tdp_mmu: make REMOVED_SPTE include shadow_initial value

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 3/4/22 20:49, isaku.yamahata@xxxxxxxxx wrote:
From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

TDP MMU uses REMOVED_SPTE = 0x5a0ULL as special constant to indicate the
intermediate value to indicate one thread is operating on it and the value
should be semi-arbitrary value.  For TDX (more correctly to use #VE), the
value should include suppress #VE value which is shadow_init_value.

Define SHADOW_REMOVED_SPTE as shadow_init_value | REMOVED_SPTE, and replace
REMOVED_SPTE with SHADOW_REMOVED_SPTE to use suppress #VE bit properly for
TDX.

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
---
  arch/x86/kvm/mmu/spte.h    | 14 ++++++++++++--
  arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++++++++++++++-------
  2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index bde843bce878..e88f796724b4 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -194,7 +194,9 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
   * If a thread running without exclusive control of the MMU lock must perform a
   * multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a
   * non-present intermediate value. Other threads which encounter this value
- * should not modify the SPTE.
+ * should not modify the SPTE.  When TDX is enabled, shadow_init_value, which
+ * is "suppress #VE" bit set, is also set to removed SPTE, because TDX module
+ * always enables "EPT violation #VE".
   *
   * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on
   * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
@@ -207,9 +209,17 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
  /* Removed SPTEs must not be misconstrued as shadow present PTEs. */
  static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK));
+/*
+ * See above comment around REMOVED_SPTE.  SHADOW_REMOVED_SPTE is the actual
+ * intermediate value set to the removed SPET.  When TDX is enabled, it sets
+ * the "suppress #VE" bit, otherwise it's REMOVED_SPTE.
+ */
+extern u64 __read_mostly shadow_init_value;
+#define SHADOW_REMOVED_SPTE	(shadow_init_value | REMOVED_SPTE)

Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call this simply REMOVED_SPTE. This also makes the patch smaller.

Paolo

  }
/*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index ebd0a02620e8..b6ec2f112c26 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -338,7 +338,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
  			 * value to the removed SPTE value.
  			 */
  			for (;;) {
-				old_child_spte = xchg(sptep, REMOVED_SPTE);
+				old_child_spte = xchg(sptep, SHADOW_REMOVED_SPTE);
  				if (!is_removed_spte(old_child_spte))
  					break;
  				cpu_relax();
@@ -365,10 +365,10 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
  			 * the two branches consistent and simplifies
  			 * the function.
  			 */
-			WRITE_ONCE(*sptep, REMOVED_SPTE);
+			WRITE_ONCE(*sptep, SHADOW_REMOVED_SPTE);
  		}
  		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
-				    old_child_spte, REMOVED_SPTE, level,
+				    old_child_spte, SHADOW_REMOVED_SPTE, level,
  				    shared);
  	}
@@ -537,7 +537,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
  	 * immediately installing a present entry in its place
  	 * before the TLBs are flushed.
  	 */
-	if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
+	if (!tdp_mmu_set_spte_atomic(kvm, iter, SHADOW_REMOVED_SPTE))
  		return false;
kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
@@ -550,8 +550,16 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
  	 * special removed SPTE value. No bookkeeping is needed
  	 * here since the SPTE is going from non-present
  	 * to non-present.
+	 *
+	 * Set non-present value to shadow_init_value, rather than 0.
+	 * It is because when TDX is enabled, TDX module always
+	 * enables "EPT-violation #VE", so KVM needs to set
+	 * "suppress #VE" bit in EPT table entries, in order to get
+	 * real EPT violation, rather than TDVMCALL.  KVM sets
+	 * shadow_init_value (which sets "suppress #VE" bit) so it
+	 * can be set when EPT table entries are zapped.
  	 */
-	WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
+	WRITE_ONCE(*rcu_dereference(iter->sptep), shadow_init_value);
return true;
  }
@@ -748,7 +756,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
  			continue;
if (!shared) {
-			tdp_mmu_set_spte(kvm, &iter, 0);
+			/* see comments in tdp_mmu_zap_spte_atomic() */
+			tdp_mmu_set_spte(kvm, &iter, shadow_init_value);
  			flush = true;
  		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
  			/*
@@ -1135,7 +1144,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
  	 * invariant that the PFN of a present * leaf SPTE can never change.
  	 * See __handle_changed_spte().
  	 */
-	tdp_mmu_set_spte(kvm, iter, 0);
+	tdp_mmu_set_spte(kvm, iter, shadow_init_value);
if (!pte_write(range->pte)) {
  		new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux