This patch adds support for UMONITOR, UMWAIT and TPAUSE instructions in kvm, and by default dont't expose it to kvm and provide a capability to enable it. Co-developed-by: Jingqi Liu <jingqi.liu@xxxxxxxxx> Signed-off-by: Jingqi Liu <jingqi.liu@xxxxxxxxx> Signed-off-by: Tao Xu <tao3.xu@xxxxxxxxx> --- Documentation/virtual/kvm/api.txt | 12 ++++++++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 7 +++++++ arch/x86/kvm/x86.h | 5 +++++ include/uapi/linux/kvm.h | 1 + 8 files changed, 32 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index ba6c42c576dd..3d0196220486 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4997,6 +4997,18 @@ it hard or impossible to use it correctly. The availability of KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed. Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT. +7.19 KVM_CAP_ENABLE_USR_WAIT_PAUSE + +Architectures: x86 +Parameters: args[0] whether feature should be enabled or not + +With this capability enabled, a VM can use UMONITOR, UMWAIT and TPAUSE +instructions. If the instruction causes a delay, the amount of +time delayed is called here the physical delay. The physical delay is +first computed by determining the virtual delay (the time to delay +relative to the VM’s timestamp counter). Otherwise, UMONITOR, UMWAIT +and TPAUSE cause an invalid-opcode exception(#UD). + 8. Other capabilities. ---------------------- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 450d69a1e6fa..0da87c2e1c4d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -882,6 +882,7 @@ struct kvm_arch { bool mwait_in_guest; bool hlt_in_guest; bool pause_in_guest; + bool enable_usr_wait_pause; unsigned long irq_sources_bitmap; s64 kvmclock_offset; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 4e4133e86484..1c94b1009288 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -82,6 +82,7 @@ #define SECONDARY_EXEC_PT_USE_GPA 0x01000000 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 80a642a0143d..1cc001870a9d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1ac167614032..a65ee7ea47b4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2247,6 +2247,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | SECONDARY_EXEC_ENABLE_VMFUNC | @@ -3880,6 +3881,9 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (kvm_pause_in_guest(vmx->vcpu.kvm)) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; + if (!kvm_enable_usr_wait_pause(vmx->vcpu.kvm) || + (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_RDTSC_EXITING)) + exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; if (!kvm_vcpu_apicv_active(vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 536b78c4af6e..38a89c878c5d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3141,6 +3141,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = kvm_x86_ops->get_nested_state ? kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0; break; + case KVM_CAP_ENABLE_USR_WAIT_PAUSE: + r = boot_cpu_has(X86_FEATURE_WAITPKG); + break; default: break; } @@ -4622,6 +4625,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.exception_payload_enabled = cap->args[0]; r = 0; break; + case KVM_CAP_ENABLE_USR_WAIT_PAUSE: + kvm->arch.enable_usr_wait_pause = true; + r = 0; + break; default: r = -EINVAL; break; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a470ff0868c5..37685e6679f3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -333,6 +333,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm) return kvm->arch.pause_in_guest; } +static inline bool kvm_enable_usr_wait_pause(struct kvm *kvm) +{ + return kvm->arch.enable_usr_wait_pause; +} + DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2fe12b40d503..5a19a5984c57 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -993,6 +993,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SVE 170 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 +#define KVM_CAP_ENABLE_USR_WAIT_PAUSE 173 #ifdef KVM_CAP_IRQ_ROUTING -- 2.20.1