Re: [PATCH 1/3] KVM: Don't enable MWAIT in guest by default

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I don't understand the concern regarding CPUID5_ECX_INTERRUPT_BREAK.
Even if the CPU has this feature, can't the guest bypass it by
disabling interrupts and invoking MWAIT with bit 0 of ECX clear?

On Sat, Nov 25, 2017 at 5:09 AM, Jan H. Schönherr <jschoenh@xxxxxxxxx> wrote:
> Allowing a guest to execute MWAIT without interception enables a guest
> to put a (physical) CPU into a power saving state, where it takes
> longer to return from than what may be desired by the host.
>
> Don't give a guest that power over a host by default. (Especially,
> since nothing prevents a guest from using MWAIT even when it is not
> advertised via CPUID.)
>
> This restores the behavior from before Linux 4.12 commit 668fffa3f838
> ("kvm: better MWAIT emulation for guests") but keeps the option to
> enable MWAIT in guest for individual VMs.
>
> Suggested-by: KarimAllah Ahmed <karahmed@xxxxxxxxx>
> Signed-off-by: Jan H. Schönherr <jschoenh@xxxxxxxxx>
> ---
> Note: AMD code paths are only compile tested
> ---
>  Documentation/virtual/kvm/api.txt | 20 ++++++++++--------
>  arch/x86/include/asm/kvm_host.h   |  2 ++
>  arch/x86/kvm/svm.c                |  2 +-
>  arch/x86/kvm/vmx.c                |  9 ++++----
>  arch/x86/kvm/x86.c                | 44 ++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/x86.h                | 35 ++-----------------------------
>  6 files changed, 64 insertions(+), 48 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index f670e4b..0ee812c 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -4163,6 +4163,17 @@ enables QEMU to build error log and branch to guest kernel registered
>  machine check handling routine. Without this capability KVM will
>  branch to guests' 0x200 interrupt vector.
>
> +7.13 KVM_CAP_X86_GUEST_MWAIT
> +
> +Architectures: x86
> +Parameters: none
> +Returns: 0 on success
> +
> +This capability indicates that a guest using memory monitoring instructions
> +(MWAIT/MWAITX) to stop a virtual CPU will not cause a VM exit. As such, time
> +spent while a virtual CPU is halted in this way will then be accounted for as
> +guest running time on the host (as opposed to e.g. HLT).
> +
>  8. Other capabilities.
>  ----------------------
>
> @@ -4275,15 +4286,6 @@ reserved.
>      Both registers and addresses are 64-bits wide.
>      It will be possible to run 64-bit or 32-bit guest code.
>
> -8.8 KVM_CAP_X86_GUEST_MWAIT
> -
> -Architectures: x86
> -
> -This capability indicates that guest using memory monotoring instructions
> -(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
> -spent while virtual CPU is halted in this way will then be accounted for as
> -guest running time on the host (as opposed to e.g. HLT).
> -
>  8.9 KVM_CAP_ARM_USER_IRQ
>
>  Architectures: arm, arm64
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b97726e..f7bcfaa 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -781,6 +781,8 @@ struct kvm_arch {
>
>         gpa_t wall_clock;
>
> +       bool mwait_in_guest;
> +
>         bool ept_identity_pagetable_done;
>         gpa_t ept_identity_map_addr;
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 1f3e7f2..ef1b320 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1253,7 +1253,7 @@ static void init_vmcb(struct vcpu_svm *svm)
>         set_intercept(svm, INTERCEPT_WBINVD);
>         set_intercept(svm, INTERCEPT_XSETBV);
>
> -       if (!kvm_mwait_in_guest()) {
> +       if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
>                 set_intercept(svm, INTERCEPT_MONITOR);
>                 set_intercept(svm, INTERCEPT_MWAIT);
>         }
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1eb7053..a067735 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3635,13 +3635,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>               CPU_BASED_USE_IO_BITMAPS |
>               CPU_BASED_MOV_DR_EXITING |
>               CPU_BASED_USE_TSC_OFFSETING |
> +             CPU_BASED_MWAIT_EXITING |
> +             CPU_BASED_MONITOR_EXITING |
>               CPU_BASED_INVLPG_EXITING |
>               CPU_BASED_RDPMC_EXITING;
>
> -       if (!kvm_mwait_in_guest())
> -               min |= CPU_BASED_MWAIT_EXITING |
> -                       CPU_BASED_MONITOR_EXITING;
> -
>         opt = CPU_BASED_TPR_SHADOW |
>               CPU_BASED_USE_MSR_BITMAPS |
>               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
> @@ -5297,6 +5295,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
>                 exec_control |= CPU_BASED_CR3_STORE_EXITING |
>                                 CPU_BASED_CR3_LOAD_EXITING  |
>                                 CPU_BASED_INVLPG_EXITING;
> +       if (kvm_mwait_in_guest(vmx->vcpu.kvm))
> +               exec_control &= ~(CPU_BASED_MWAIT_EXITING |
> +                                 CPU_BASED_MONITOR_EXITING);
>         return exec_control;
>  }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 985a305..fe6627a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -67,6 +67,7 @@
>  #include <asm/pvclock.h>
>  #include <asm/div64.h>
>  #include <asm/irq_remapping.h>
> +#include <asm/mwait.h>
>
>  #define CREATE_TRACE_POINTS
>  #include "trace.h"
> @@ -2672,6 +2673,40 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
>         return r;
>  }
>
> +static bool kvm_mwait_in_guest_possible(void)
> +{
> +       unsigned int eax, ebx, ecx, edx;
> +
> +       if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
> +               return false;
> +
> +       switch (boot_cpu_data.x86_vendor) {
> +       case X86_VENDOR_AMD:
> +               /* All AMD CPUs have a working MWAIT implementation */
> +               return true;
> +       case X86_VENDOR_INTEL:
> +               /* Handle Intel below */
> +               break;
> +       default:
> +               return false;
> +       }
> +
> +       /*
> +        * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
> +        * they would allow guest to stop the CPU completely by disabling
> +        * interrupts then invoking MWAIT.
> +        */
> +       if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
> +               return false;
> +
> +       cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
> +
> +       if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
> +               return false;
> +
> +       return true;
> +}
> +
>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  {
>         int r;
> @@ -2726,7 +2761,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>                 r = KVM_CLOCK_TSC_STABLE;
>                 break;
>         case KVM_CAP_X86_GUEST_MWAIT:
> -               r = kvm_mwait_in_guest();
> +               r = kvm_mwait_in_guest_possible();
>                 break;
>         case KVM_CAP_X86_SMM:
>                 /* SMBASE is usually relocated above 1M on modern chipsets,
> @@ -4026,6 +4061,13 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>
>                 r = 0;
>                 break;
> +       case KVM_CAP_X86_GUEST_MWAIT:
> +               r = -EINVAL;
> +               if (kvm_mwait_in_guest_possible()) {
> +                       kvm->arch.mwait_in_guest = true;
> +                       r = 0;
> +               }
> +               break;
>         default:
>                 r = -EINVAL;
>                 break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index d0b95b7..ed8e150 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -2,8 +2,6 @@
>  #ifndef ARCH_X86_KVM_X86_H
>  #define ARCH_X86_KVM_X86_H
>
> -#include <asm/processor.h>
> -#include <asm/mwait.h>
>  #include <linux/kvm_host.h>
>  #include <asm/pvclock.h>
>  #include "kvm_cache_regs.h"
> @@ -263,38 +261,9 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
>             __rem;                                              \
>          })
>
> -static inline bool kvm_mwait_in_guest(void)
> +static inline bool kvm_mwait_in_guest(struct kvm *kvm)
>  {
> -       unsigned int eax, ebx, ecx, edx;
> -
> -       if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
> -               return false;
> -
> -       switch (boot_cpu_data.x86_vendor) {
> -       case X86_VENDOR_AMD:
> -               /* All AMD CPUs have a working MWAIT implementation */
> -               return true;
> -       case X86_VENDOR_INTEL:
> -               /* Handle Intel below */
> -               break;
> -       default:
> -               return false;
> -       }
> -
> -       /*
> -        * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
> -        * they would allow guest to stop the CPU completely by disabling
> -        * interrupts then invoking MWAIT.
> -        */
> -       if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
> -               return false;
> -
> -       cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
> -
> -       if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
> -               return false;
> -
> -       return true;
> +       return kvm->arch.mwait_in_guest;
>  }
>
>  #endif
> --
> 2.3.1.dirty
>




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux