From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> Unlike normal VMs, confidential VMs (Intel TDX and AMD SEV-ES) don't allow some operations (e.g., memory read/write, register state acces, etc). Introduce vm_type to track the type of the VM. Further, different policy can be made based on vm_type. Define KVM_X86_NORMAL_VM for normal VM as default and define KVM_X86_TDX_VM for Intel TDX VM. Add a capability KVM_CAP_VM_TYPES to effectively allow userspace to query what VM types are supported by KVM. Co-developed-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> --- Documentation/virt/kvm/api.rst | 15 +++++++++++++++ arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/include/uapi/asm/kvm.h | 3 +++ arch/x86/kvm/svm/svm.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 9 ++++++++- include/uapi/linux/kvm.h | 1 + tools/arch/x86/include/uapi/asm/kvm.h | 3 +++ tools/include/uapi/linux/kvm.h | 1 + 10 files changed, 46 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 3b093d6dbe22..79b64cdb603b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -147,15 +147,30 @@ described as 'basic' will be available. The new VM has no virtual cpus and no memory. You probably want to use 0 as machine type. +X86: +^^^^ + +Supported vm type can be queried from KVM_CAP_VM_TYPES, which returns the +bitmap of supported vm types. The 1-setting of bit @n means vm type with +value @n is supported. + +S390: +^^^^^ + In order to create user controlled virtual machines on S390, check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as privileged user (CAP_SYS_ADMIN). +MIPS: +^^^^^ + To use hardware assisted virtualization on MIPS (VZ ASE) rather than the default trap & emulate implementation (which changes the virtual memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the flag KVM_VM_MIPS_VZ. +ARM64: +^^^^^^ On arm64, the physical address size for a VM (IPA Size limit) is limited to 40bits by default. The limit can be configured if the host supports the diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index cefe1d81e2e8..4aa0a1c10b84 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -18,6 +18,7 @@ KVM_X86_OP_NULL(hardware_unsetup) KVM_X86_OP_NULL(cpu_has_accelerated_tpr) KVM_X86_OP(has_emulated_msr) KVM_X86_OP(vcpu_after_set_cpuid) +KVM_X86_OP(is_vm_type_supported) KVM_X86_OP(vm_init) KVM_X86_OP_NULL(vm_destroy) KVM_X86_OP(vcpu_create) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2acf37cc1991..a5e27111b198 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1036,6 +1036,7 @@ struct kvm_x86_msr_filter { #define APICV_INHIBIT_REASON_X2APIC 5 struct kvm_arch { + unsigned long vm_type; unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; @@ -1310,6 +1311,7 @@ struct kvm_x86_ops { bool (*has_emulated_msr)(struct kvm *kvm, u32 index); void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); + bool (*is_vm_type_supported)(unsigned long vm_type); unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5a776a08f78c..102b71229f87 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -508,4 +508,7 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ +#define KVM_X86_NORMAL_VM 0 +#define KVM_X86_TDX_VM 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b36ca4e476c2..1c7a7e1f56f8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4567,6 +4567,11 @@ static void svm_vm_destroy(struct kvm *kvm) sev_vm_destroy(kvm); } +static bool svm_is_vm_type_supported(unsigned long type) +{ + return type == KVM_X86_NORMAL_VM; +} + static int svm_vm_init(struct kvm *kvm) { if (!pause_filter_count || !pause_filter_thresh) @@ -4594,6 +4599,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vcpu_free = svm_free_vcpu, .vcpu_reset = svm_vcpu_reset, + .is_vm_type_supported = svm_is_vm_type_supported, .vm_size = sizeof(struct kvm_svm), .vm_init = svm_vm_init, .vm_destroy = svm_vm_destroy, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 76861b66bbcf..41513ae18f22 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6925,6 +6925,11 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) return err; } +static bool vmx_is_vm_type_supported(unsigned long type) +{ + return type == KVM_X86_NORMAL_VM; +} + #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" #define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" @@ -7578,6 +7583,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cpu_has_accelerated_tpr = report_flexpriority, .has_emulated_msr = vmx_has_emulated_msr, + .is_vm_type_supported = vmx_is_vm_type_supported, .vm_size = sizeof(struct kvm_vmx), .vm_init = vmx_vm_init, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c1c4e2b05a63..c080c68e4386 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4183,6 +4183,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = 0; break; + case KVM_CAP_VM_TYPES: + r = BIT(KVM_X86_NORMAL_VM); + if (static_call(kvm_x86_is_vm_type_supported)(KVM_X86_TDX_VM)) + r |= BIT(KVM_X86_TDX_VM); + break; default: break; } @@ -11231,9 +11236,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) int ret; unsigned long flags; - if (type) + if (!static_call(kvm_x86_is_vm_type_supported)(type)) return -EINVAL; + kvm->arch.vm_type = type; + ret = kvm_page_track_init(kvm); if (ret) return ret; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 78f0719cc2a3..91f75704ef6a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_TYPES 206 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2ef1f6513c68..69851dfd5085 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -504,4 +504,7 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +#define KVM_X86_NORMAL_VM 0 +#define KVM_X86_TDX_VM 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index a067410ebea5..e7616e64fb4e 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1112,6 +1112,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_TYPES 206 #ifdef KVM_CAP_IRQ_ROUTING -- 2.27.0