On 8/13/2024 6:48 AM, Rick Edgecombe wrote: > From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > > After the crypto-protection key has been configured, TDX requires a > VM-scope initialization as a step of creating the TDX guest. This > "per-VM" TDX initialization does the global configurations/features that > the TDX guest can support, such as guest's CPUIDs (emulated by the TDX > module), the maximum number of vcpus etc. > > This "per-VM" TDX initialization must be done before any "vcpu-scope" TDX > initialization. To match this better, require the KVM_TDX_INIT_VM IOCTL() > to be done before KVM creates any vcpus. > > Co-developed-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> > Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> > Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > Signed-off-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx> > --- > uAPI breakout v1: > - Drop TDX_TD_XFAM_CET and use XFEATURE_MASK_CET_{USER, KERNEL}. > - Update for the wrapper functions for SEAMCALLs. (Sean) > - Move gfn_shared_mask settings into this patch due to MMU section move > - Fix bisectability issues in headers (Kai) > - Updates from seamcall overhaul (Kai) > - Allow userspace configure xfam directly > - Check if user sets non-configurable bits in CPUIDs > - Rename error->hw_error > - Move code change to tdx_module_setup() to __tdx_bringup() due to > initializing is done in post hardware_setup() now and > tdx_module_setup() is removed. Remove the code to use API to read > global metadata but use exported 'struct tdx_sysinfo' pointer. > - Replace 'tdx_info->nr_tdcs_pages' with a wrapper > tdx_sysinfo_nr_tdcs_pages() because the 'struct tdx_sysinfo' doesn't > have nr_tdcs_pages directly. > - Replace tdx_info->max_vcpus_per_td with the new exported pointer in > tdx_vm_init(). > - Decrease the reserved space for struct kvm_tdx_init_vm (Kai) > - Use sizeof_field() for struct kvm_tdx_init_vm cpuids (Tony) > - No need to init init_vm, it gets copied over in tdx_td_init() (Chao) > - Use kmalloc() instead of () kzalloc for init_vm in tdx_td_init() (Chao) > - Add more line breaks to tdx_td_init() to make code easier to read (Tony) > - Clarify patch description (Kai) > > v19: > - Check NO_RBP_MOD of feature0 and set it > - Update the comment for PT and CET > > v18: > - remove the change of tools/arch/x86/include/uapi/asm/kvm.h > - typo in comment. sha348 => sha384 > - updated comment in setup_tdparams_xfam() > - fix setup_tdparams_xfam() to use init_vm instead of td_params > > v16: > - Removed AMX check as the KVM upstream supports AMX. > - Added CET flag to guest supported xss > --- > arch/x86/include/uapi/asm/kvm.h | 24 ++++ > arch/x86/kvm/cpuid.c | 7 + > arch/x86/kvm/cpuid.h | 2 + > arch/x86/kvm/vmx/tdx.c | 237 ++++++++++++++++++++++++++++++-- > arch/x86/kvm/vmx/tdx.h | 4 + > arch/x86/kvm/vmx/tdx_ops.h | 12 ++ > 6 files changed, 276 insertions(+), 10 deletions(-) > ... > +static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params, > + u64 *seamcall_err) > { > struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); > cpumask_var_t packages; > @@ -427,8 +547,9 @@ static int __tdx_td_init(struct kvm *kvm) > unsigned long tdr_pa = 0; > unsigned long va; > int ret, i; > - u64 err; > + u64 err, rcx; > > + *seamcall_err = 0; > ret = tdx_guest_keyid_alloc(); > if (ret < 0) > return ret; > @@ -543,10 +664,23 @@ static int __tdx_td_init(struct kvm *kvm) > } > } > > - /* > - * Note, TDH_MNG_INIT cannot be invoked here. TDH_MNG_INIT requires a dedicated > - * ioctl() to define the configure CPUID values for the TD. > - */ > + err = tdh_mng_init(kvm_tdx, __pa(td_params), &rcx); > + if ((err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_INVALID) { > + /* > + * Because a user gives operands, don't warn. > + * Return a hint to the user because it's sometimes hard for the > + * user to figure out which operand is invalid. SEAMCALL status > + * code includes which operand caused invalid operand error. > + */ > + *seamcall_err = err; I'm wondering if we could return or output more hint (i.e. the value of rcx) in the case of invalid operand. For example, if seamcall returns with INVALID_OPERAND_CPUID_CONFIG, rcx will contain the CPUID leaf/sub-leaf info. > + ret = -EINVAL; > + goto teardown; > + } else if (WARN_ON_ONCE(err)) { > + pr_tdx_error_1(TDH_MNG_INIT, err, rcx); > + ret = -EIO; > + goto teardown; > + } > + > return 0; > > /* > @@ -592,6 +726,86 @@ static int __tdx_td_init(struct kvm *kvm) > return ret; > } > > +static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd) > +{ > + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); > + struct kvm_tdx_init_vm *init_vm; > + struct td_params *td_params = NULL; > + int ret; > + > + BUILD_BUG_ON(sizeof(*init_vm) != 256 + sizeof_field(struct kvm_tdx_init_vm, cpuid)); > + BUILD_BUG_ON(sizeof(struct td_params) != 1024); > + > + if (is_hkid_assigned(kvm_tdx)) > + return -EINVAL; > + > + if (cmd->flags) > + return -EINVAL; > + > + init_vm = kmalloc(sizeof(*init_vm) + > + sizeof(init_vm->cpuid.entries[0]) * KVM_MAX_CPUID_ENTRIES, > + GFP_KERNEL); > + if (!init_vm) > + return -ENOMEM; > + > + if (copy_from_user(init_vm, u64_to_user_ptr(cmd->data), sizeof(*init_vm))) { > + ret = -EFAULT; > + goto out; > + } > + > + if (init_vm->cpuid.nent > KVM_MAX_CPUID_ENTRIES) { > + ret = -E2BIG; > + goto out; > + } > + > + if (copy_from_user(init_vm->cpuid.entries, > + u64_to_user_ptr(cmd->data) + sizeof(*init_vm), > + flex_array_size(init_vm, cpuid.entries, init_vm->cpuid.nent))) { > + ret = -EFAULT; > + goto out; > + } > + > + if (memchr_inv(init_vm->reserved, 0, sizeof(init_vm->reserved))) { > + ret = -EINVAL; > + goto out; > + } > + > + if (init_vm->cpuid.padding) { > + ret = -EINVAL; > + goto out; > + } > + > + td_params = kzalloc(sizeof(struct td_params), GFP_KERNEL); > + if (!td_params) { > + ret = -ENOMEM; > + goto out; > + } > + > + ret = setup_tdparams(kvm, td_params, init_vm); > + if (ret) > + goto out; > + > + ret = __tdx_td_init(kvm, td_params, &cmd->hw_error); > + if (ret) > + goto out; > + > + kvm_tdx->tsc_offset = td_tdcs_exec_read64(kvm_tdx, TD_TDCS_EXEC_TSC_OFFSET); > + kvm_tdx->attributes = td_params->attributes; > + kvm_tdx->xfam = td_params->xfam; > + > + if (td_params->exec_controls & TDX_EXEC_CONTROL_MAX_GPAW) > + kvm->arch.gfn_direct_bits = gpa_to_gfn(BIT_ULL(51)); > + else > + kvm->arch.gfn_direct_bits = gpa_to_gfn(BIT_ULL(47)); > + > +out: > + /* kfree() accepts NULL. */ > + kfree(init_vm); > + kfree(td_params); > + > + return ret; > +} > + > int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) > { > struct kvm_tdx_cmd tdx_cmd; > @@ -613,6 +827,9 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) > case KVM_TDX_CAPABILITIES: > r = tdx_get_capabilities(&tdx_cmd); > break; > + case KVM_TDX_INIT_VM: > + r = tdx_td_init(kvm, &tdx_cmd); > + break; > default: > r = -EINVAL; > goto out; > diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h > index 268959d0f74f..8912cb6d5bc2 100644 > --- a/arch/x86/kvm/vmx/tdx.h > +++ b/arch/x86/kvm/vmx/tdx.h > @@ -16,7 +16,11 @@ struct kvm_tdx { > unsigned long tdr_pa; > unsigned long *tdcs_pa; > > + u64 attributes; > + u64 xfam; > int hkid; > + > + u64 tsc_offset; > }; > > struct vcpu_tdx { > diff --git a/arch/x86/kvm/vmx/tdx_ops.h b/arch/x86/kvm/vmx/tdx_ops.h > index 3f64c871a3f2..0363d8544f42 100644 > --- a/arch/x86/kvm/vmx/tdx_ops.h > +++ b/arch/x86/kvm/vmx/tdx_ops.h > @@ -399,4 +399,16 @@ static inline u64 tdh_vp_wr(struct vcpu_tdx *tdx, u64 field, u64 val, u64 mask) > return seamcall(TDH_VP_WR, &in); > } > > +static __always_inline u64 td_tdcs_exec_read64(struct kvm_tdx *kvm_tdx, u32 field) > +{ > + u64 err, data; > + > + err = tdh_mng_rd(kvm_tdx, TDCS_EXEC(field), &data); > + if (unlikely(err)) { > + pr_err("TDH_MNG_RD[EXEC.0x%x] failed: 0x%llx\n", field, err); > + return 0; > + } > + return data; > +} > + > #endif /* __KVM_X86_TDX_OPS_H */