On 9/4/2024 11:07 AM, Rick Edgecombe wrote:
From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
[...]
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 6feb3ab96926..b8cd5a629a80 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -447,6 +447,177 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
}
+static void tdx_unpin(struct kvm *kvm, kvm_pfn_t pfn)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ put_page(page);
Nit: It can be
put_page(pfn_to_page(pfn));
+}
+
+static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
+ enum pg_level level, kvm_pfn_t pfn)
+{
+ int tdx_level = pg_level_to_tdx_sept_level(level);
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+ hpa_t hpa = pfn_to_hpa(pfn);
+ gpa_t gpa = gfn_to_gpa(gfn);
+ u64 entry, level_state;
+ u64 err;
+
+ err = tdh_mem_page_aug(kvm_tdx, gpa, hpa, &entry, &level_state);
Nit:
Usually, kernel prefers to handle and return for error conditions first.
But for this case, for all error conditions, it needs to unpin the page.
Is it better to return the successful case first, so that it only needs
to call tdx_unpin() once?
+ if (unlikely(err == TDX_ERROR_SEPT_BUSY)) {
+ tdx_unpin(kvm, pfn);
+ return -EAGAIN;
+ }
+ if (unlikely(err == (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))) {
+ if (tdx_get_sept_level(level_state) == tdx_level &&
+ tdx_get_sept_state(level_state) == TDX_SEPT_PENDING &&
+ is_last_spte(entry, level) &&
+ spte_to_pfn(entry) == pfn &&
+ entry & VMX_EPT_SUPPRESS_VE_BIT) {
Can this condition be triggered?
For contention from multiple vCPUs, the winner has frozen the SPTE,
it shouldn't trigger this.
Could KVM do page aug for a same page multiple times somehow?
+ tdx_unpin(kvm, pfn);
+ return -EAGAIN;
+ }
+ }
+ if (KVM_BUG_ON(err, kvm)) {
+ pr_tdx_error_2(TDH_MEM_PAGE_AUG, err, entry, level_state);
+ tdx_unpin(kvm, pfn);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
+ enum pg_level level, kvm_pfn_t pfn)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+
+ /* TODO: handle large pages. */
+ if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
+ return -EINVAL;
+
+ /*
+ * Because guest_memfd doesn't support page migration with
+ * a_ops->migrate_folio (yet), no callback is triggered for KVM on page
+ * migration. Until guest_memfd supports page migration, prevent page
+ * migration.
+ * TODO: Once guest_memfd introduces callback on page migration,
+ * implement it and remove get_page/put_page().
+ */
+ get_page(pfn_to_page(pfn));
+
+ if (likely(is_td_finalized(kvm_tdx)))
+ return tdx_mem_page_aug(kvm, gfn, level, pfn);
+
+ /*
+ * TODO: KVM_MAP_MEMORY support to populate before finalize comes
+ * here for the initial memory.
+ */
+ return 0;
Is it better to return error before adding the support?
+}
+
[...]