From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> Add a new VM-scoped KVM_MEMORY_ENCRYPT_OP IOCTL subcommand, KVM_TDX_FINALIZE_VM, to perform TD Measurement Finalization. Documentation for the API is added in another patch: "Documentation/virt/kvm: Document on Trust Domain Extensions(TDX)" For the purpose of attestation, a measurement must be made of the TDX VM initial state. This is referred to as TD Measurement Finalization, and uses SEAMCALL TDH.MR.FINALIZE, after which: 1. The VMM adding TD private pages with arbitrary content is no longer allowed 2. The TDX VM is runnable Co-developed-by: Adrian Hunter <adrian.hunter@xxxxxxxxx> Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx> Message-ID: <20240904030751.117579-21-rick.p.edgecombe@xxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/vmx/tdx.c | 78 +++++++++++++++++++++++++++++---- arch/x86/kvm/vmx/tdx.h | 3 ++ 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ee1b517351a3..89cc7a18ef45 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -933,6 +933,7 @@ enum kvm_tdx_cmd_id { KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU, KVM_TDX_INIT_MEM_REGION, + KVM_TDX_FINALIZE_VM, KVM_TDX_GET_CPUID, KVM_TDX_CMD_NR_MAX, diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 12f3433d062d..246f924ae6f6 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -557,6 +557,29 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn, return 0; } +/* + * KVM_TDX_INIT_MEM_REGION calls kvm_gmem_populate() to map guest pages; the + * callback tdx_gmem_post_populate() then maps pages into private memory. + * through the a seamcall TDH.MEM.PAGE.ADD(). The SEAMCALL also requires the + * private EPT structures for the page to have been built before, which is + * done via kvm_tdp_map_page(). nr_premapped counts the number of pages that + * were added to the EPT structures but not added with TDH.MEM.PAGE.ADD(). + * The counter has to be zero on KVM_TDX_FINALIZE_VM, to ensure that there + * are no half-initialized shared EPT pages. + */ +static int tdx_mem_page_record_premap_cnt(struct kvm *kvm, gfn_t gfn, + enum pg_level level, kvm_pfn_t pfn) +{ + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); + + if (KVM_BUG_ON(kvm->arch.pre_fault_allowed, kvm)) + return -EINVAL; + + /* nr_premapped will be decreased when tdh_mem_page_add() is called. */ + atomic64_inc(&kvm_tdx->nr_premapped); + return 0; +} + int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn) { @@ -577,14 +600,15 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, */ get_page(page); + /* + * Read 'pre_fault_allowed' before 'kvm_tdx->state'; see matching + * barrier in tdx_td_finalize(). + */ + smp_rmb(); if (likely(kvm_tdx->state == TD_STATE_RUNNABLE)) return tdx_mem_page_aug(kvm, gfn, level, page); - /* - * TODO: KVM_TDX_INIT_MEM_REGION support to populate before finalize - * comes here for the initial memory. - */ - return -EOPNOTSUPP; + return tdx_mem_page_record_premap_cnt(kvm, gfn, level, pfn); } static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, @@ -615,10 +639,12 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE && err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) { /* - * This page was mapped with KVM_MAP_MEMORY, but - * KVM_TDX_INIT_MEM_REGION is not issued yet. + * Page is mapped by KVM_TDX_INIT_MEM_REGION, but hasn't called + * tdh_mem_page_add(). */ - if (!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) { + if ((!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) && + !KVM_BUG_ON(!atomic64_read(&kvm_tdx->nr_premapped), kvm)) { + atomic64_dec(&kvm_tdx->nr_premapped); tdx_unpin(kvm, page); return 0; } @@ -1365,6 +1391,36 @@ void tdx_flush_tlb_all(struct kvm_vcpu *vcpu) ept_sync_global(); } +static int tdx_td_finalize(struct kvm *kvm, struct kvm_tdx_cmd *cmd) +{ + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); + + guard(mutex)(&kvm->slots_lock); + + if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE) + return -EINVAL; + /* + * Pages are pending for KVM_TDX_INIT_MEM_REGION to issue + * TDH.MEM.PAGE.ADD(). + */ + if (atomic64_read(&kvm_tdx->nr_premapped)) + return -EINVAL; + + cmd->hw_error = tdh_mr_finalize(&kvm_tdx->td); + if (tdx_operand_busy(cmd->hw_error)) + return -EBUSY; + if (KVM_BUG_ON(cmd->hw_error, kvm)) { + pr_tdx_error(TDH_MR_FINALIZE, cmd->hw_error); + return -EIO; + } + + kvm_tdx->state = TD_STATE_RUNNABLE; + /* TD_STATE_RUNNABLE must be set before 'pre_fault_allowed' */ + smp_wmb(); + kvm->arch.pre_fault_allowed = true; + return 0; +} + int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_tdx_cmd tdx_cmd; @@ -1389,6 +1445,9 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) case KVM_TDX_INIT_VM: r = tdx_td_init(kvm, &tdx_cmd); break; + case KVM_TDX_FINALIZE_VM: + r = tdx_td_finalize(kvm, &tdx_cmd); + break; default: r = -EINVAL; goto out; @@ -1656,6 +1715,9 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, goto out; } + if (!KVM_BUG_ON(!atomic64_read(&kvm_tdx->nr_premapped), kvm)) + atomic64_dec(&kvm_tdx->nr_premapped); + if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) { for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) { err = tdh_mr_extend(&kvm_tdx->td, gpa + i, &entry, diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index 77d693dfcb08..b71f8e9643e4 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -31,6 +31,9 @@ struct kvm_tdx { u64 tsc_offset; struct tdx_td td; + + /* For KVM_TDX_INIT_MEM_REGION. */ + atomic64_t nr_premapped; }; /* TDX module vCPU states */ -- 2.43.5