On 5/29/2023 12:19 PM, isaku.yamahata@xxxxxxxxx wrote:
From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
The TDX support will need the "suppress #VE" bit (bit 63) set as the
initial value for SPTE. To reduce code change size, introduce a new macro
SHADOW_NONPRESENT_VALUE for the initial value for the shadow page table
entry (SPTE) and replace hard-coded value 0 for it. Initialize shadow page
tables with their value.
The plan is to unconditionally set the "suppress #VE" bit for both AMD and
Intel as: 1) AMD hardware uses the bit 63 as NX for present SPTE and
ignored for non-present SPTE; 2) for conventional VMX guests, KVM never
enables the "EPT-violation #VE" in VMCS control and "suppress #VE" bit is
ignored by hardware.
Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++++-----
arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
arch/x86/kvm/mmu/spte.h | 2 ++
arch/x86/kvm/mmu/tdp_mmu.c | 14 +++++++-------
4 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index dc2b9a2f717c..1b6fd4434e96 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -576,9 +576,9 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
if (!is_shadow_present_pte(old_spte) ||
!spte_has_volatile_bits(old_spte))
- __update_clear_spte_fast(sptep, 0ull);
+ __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
else
- old_spte = __update_clear_spte_slow(sptep, 0ull);
+ old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE);
if (!is_shadow_present_pte(old_spte))
return old_spte;
@@ -612,7 +612,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
*/
static void mmu_spte_clear_no_track(u64 *sptep)
{
- __update_clear_spte_fast(sptep, 0ull);
+ __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
}
static u64 mmu_spte_get_lockless(u64 *sptep)
@@ -1969,7 +1969,8 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i)
{
- if (!sp->spt[i])
+ /* sp->spt[i] has initial value of shadow page table allocation */
+ if (sp->spt[i] != SHADOW_NONPRESENT_VALUE)
this condition should be "sp->spt[i] == SHADOW_NONPRESENT_VALUE"?
I found this cause the bug of "guest launch failure with EPT disable".
return 0;
return vcpu->arch.mmu->sync_spte(vcpu, sp, i);
@@ -6120,7 +6121,16 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
- vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
+ /*
+ * When X86_64, initial SEPT entries are initialized with
+ * SHADOW_NONPRESENT_VALUE. Otherwise zeroed. See
+ * mmu_memory_cache_alloc_obj().
+ */
+ if (IS_ENABLED(CONFIG_X86_64))
+ vcpu->arch.mmu_shadow_page_cache.init_value =
+ SHADOW_NONPRESENT_VALUE;
+ if (!vcpu->arch.mmu_shadow_page_cache.init_value)
+ vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 0662e0278e70..ef8124bd2f11 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -892,7 +892,7 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
gpa_t pte_gpa;
gfn_t gfn;
- if (WARN_ON_ONCE(!sp->spt[i]))
+ if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE))
return 0;
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 1279db2eab44..a99eb7d4ae5d 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -148,6 +148,8 @@ static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11);
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
+#define SHADOW_NONPRESENT_VALUE 0ULL
+
extern u64 __read_mostly shadow_host_writable_mask;
extern u64 __read_mostly shadow_mmu_writable_mask;
extern u64 __read_mostly shadow_nx_mask;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 3000ef6d79ea..ddd995885dd3 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -627,7 +627,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* here since the SPTE is going from non-present to non-present. Use
* the raw write helper to avoid an unnecessary check on volatile bits.
*/
- __kvm_tdp_mmu_write_spte(iter->sptep, 0);
+ __kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE);
return 0;
}
@@ -764,8 +764,8 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
continue;
if (!shared)
- tdp_mmu_iter_set_spte(kvm, &iter, 0);
- else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
+ tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
+ else if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE))
goto retry;
}
}
@@ -821,8 +821,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
return false;
- tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
- sp->gfn, sp->role.level + 1);
+ tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte,
+ SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1);
return true;
}
@@ -856,7 +856,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
!is_last_spte(iter.old_spte, iter.level))
continue;
- tdp_mmu_iter_set_spte(kvm, &iter, 0);
+ tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
flush = true;
}
@@ -1250,7 +1250,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
* invariant that the PFN of a present * leaf SPTE can never change.
* See handle_changed_spte().
*/
- tdp_mmu_iter_set_spte(kvm, iter, 0);
+ tdp_mmu_iter_set_spte(kvm, iter, SHADOW_NONPRESENT_VALUE);
if (!pte_write(range->pte)) {
new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,