From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> TDX has its own mechanism to control the maximum number of vCPUs that the TDX guest can use. When creating a TDX guest, the maximum number of vCPUs of the guest needs to be passed to the TDX module as part of the measurement of the guest. Depending on TDX module's version, it may also report the maximum vCPUs it can support for all TDX guests. Because the maximum number of vCPUs is part of the measurement, thus part of attestation, it's better to allow the userspace to be able to configure it. E.g. the users may want to precisely control the maximum number of vCPUs their precious VMs can use. The actual control itself must be done via the TDH.MNG.INIT SEAMCALL, where the number of maximum cpus is part of the input to the TDX module, but KVM needs to support the "per-VM maximum number of vCPUs" and reflect that in the KVM_CAP_MAX_VCPUS. Currently, the KVM x86 always reports KVM_MAX_VCPUS for all VMs but doesn't allow to enable KVM_CAP_MAX_VCPUS to configure the number of maximum vCPUs on VM-basis. Add "per-VM maximum number of vCPUs" to KVM x86/TDX to accommodate TDX's needs. Specifically, use KVM's existing KVM_ENABLE_CAP IOCTL() to allow the userspace to configure the maximum vCPUs by making KVM x86 support enabling the KVM_CAP_MAX_VCPUS cap on VM-basis. For that, add a new 'kvm_x86_ops::vm_enable_cap()' callback and call it from kvm_vm_ioctl_enable_cap() as a placeholder to handle the KVM_CAP_MAX_VCPUS for TDX guests (and other KVM_CAP_xx for TDX and/or other VMs if needed in the future). Implement the callback for TDX guest to check whether the maximum vCPUs passed from usrspace can be supported by TDX, and if it can, override the 'struct kvm::max_vcpus'. Leave VMX guests and all AMD guests unsupported to avoid any side-effect for those VMs. Accordingly, in the KVM_CHECK_EXTENSION IOCTL(), change to return the 'struct kvm::max_vcpus' for a given VM for the KVM_CAP_MAX_VCPUS. Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx> --- uAPI breakout v1: - Change to use exported 'struct tdx_sysinfo' pointer. - Remove the code to read 'max_vcpus_per_td' since it is now done in TDX host code. - Drop max_vcpu ops to use kvm.max_vcpus - Remove TDX_MAX_VCPUS (Kai) - Use type cast (u16) instead of calling memcpy() when reading the 'max_vcpus_per_td' (Kai) - Improve change log and change patch title from "KVM: TDX: Make KVM_CAP_MAX_VCPUS backend specific" (Kai) --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/main.c | 10 ++++++++++ arch/x86/kvm/vmx/tdx.c | 29 +++++++++++++++++++++++++++++ arch/x86/kvm/vmx/x86_ops.h | 5 +++++ arch/x86/kvm/x86.c | 4 ++++ 6 files changed, 50 insertions(+) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 538f50eee86d..bd7434fe5d37 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -19,6 +19,7 @@ KVM_X86_OP(hardware_disable) KVM_X86_OP(hardware_unsetup) KVM_X86_OP(has_emulated_msr) KVM_X86_OP(vcpu_after_set_cpuid) +KVM_X86_OP_OPTIONAL(vm_enable_cap) KVM_X86_OP(vm_init) KVM_X86_OP_OPTIONAL(vm_destroy) KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c754183e0932..9d15f810f046 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1648,6 +1648,7 @@ struct kvm_x86_ops { void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); unsigned int vm_size; + int (*vm_enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap); int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 59f4d2d42620..cd53091ddaab 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -7,6 +7,7 @@ #include "pmu.h" #include "posted_intr.h" #include "tdx.h" +#include "tdx_arch.h" static __init int vt_hardware_setup(void) { @@ -41,6 +42,14 @@ static __init int vt_hardware_setup(void) return 0; } +static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + if (is_td(kvm)) + return tdx_vm_enable_cap(kvm, cap); + + return -EINVAL; +} + static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp) { if (!is_td(kvm)) @@ -72,6 +81,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .has_emulated_msr = vmx_has_emulated_msr, .vm_size = sizeof(struct kvm_vmx), + .vm_enable_cap = vt_vm_enable_cap, .vm_init = vmx_vm_init, .vm_destroy = vmx_vm_destroy, diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index f9faec217ea9..84cd9b4f90b5 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -44,6 +44,35 @@ struct kvm_tdx_caps { static struct kvm_tdx_caps *kvm_tdx_caps; +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r; + + switch (cap->cap) { + case KVM_CAP_MAX_VCPUS: { + if (cap->flags || cap->args[0] == 0) + return -EINVAL; + if (cap->args[0] > KVM_MAX_VCPUS || + cap->args[0] > tdx_sysinfo->td_conf.max_vcpus_per_td) + return -E2BIG; + + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + r = -EBUSY; + else { + kvm->max_vcpus = cap->args[0]; + r = 0; + } + mutex_unlock(&kvm->lock); + break; + } + default: + r = -EINVAL; + break; + } + return r; +} + static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd) { const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index c69ca640abe6..c1bdf7d8fee3 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -119,8 +119,13 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu); void vmx_setup_mce(struct kvm_vcpu *vcpu); #ifdef CONFIG_INTEL_TDX_HOST +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int tdx_vm_ioctl(struct kvm *kvm, void __user *argp); #else +static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + return -EINVAL; +}; static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; } #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7914ea50fd04..751b3841c48f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4754,6 +4754,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; + if (kvm) + r = kvm->max_vcpus; break; case KVM_CAP_MAX_VCPU_ID: r = KVM_MAX_VCPU_IDS; @@ -6772,6 +6774,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } default: r = -EINVAL; + if (kvm_x86_ops.vm_enable_cap) + r = static_call(kvm_x86_vm_enable_cap)(kvm, cap); break; } return r; -- 2.34.1