In certain use-cases, we want to allocate guests fixed time slices where idle guest cycles leave the machine idling. There are many approaches to achieve this but the most direct is to simply avoid trapping the HLT instruction which lets the guest directly execute the instruction putting the processor to sleep. Introduce this as a module-level option for kvm-vmx.ko since if you do this for one guest, you probably want to do it for all. Signed-off-by: Anthony Liguori <aliguori@xxxxxxxxxx> --- v5 -> v4 - RIP is updated before setting activity state so emulation is unnecessary - yield_on_hlt is now __read_mostly v4 -> v3 - Clear HLT on all interrupt/exception injection and set EIP to next instruction v3 -> v2 - Clear HLT activity state on exception injection to fix issue with async PF v1 -> v2 - Rename parameter to yield_on_hlt - Remove __read_mostly diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 42d9590..9642c22 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -297,6 +297,12 @@ enum vmcs_field { #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +/* GUEST_ACTIVITY_STATE flags */ +#define GUEST_ACTIVITY_ACTIVE 0 +#define GUEST_ACTIVITY_HLT 1 +#define GUEST_ACTIVITY_SHUTDOWN 2 +#define GUEST_ACTIVITY_WAIT_SIPI 3 + /* * Exit Qualifications for MOV for Control Register Access */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index caa967e..e9dbecf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); static int __read_mostly vmm_exclusive = 1; module_param(vmm_exclusive, bool, S_IRUGO); +static int __read_mostly yield_on_hlt = 1; +module_param(yield_on_hlt, bool, S_IRUGO); + #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) #define KVM_GUEST_CR0_MASK \ @@ -1009,6 +1012,18 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); } +static void vmx_clear_hlt(struct kvm_vcpu *vcpu) +{ + /* Ensure that we clear the HLT state in the VMCS. We don't need to + * explicitly skip the instruction because if the HLT state is set, then + * the instruction is already executing and RIP has already been + * advanced. */ + if (!yield_on_hlt && + vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) { + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); + } +} + static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code, bool reinject) @@ -1035,6 +1050,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, intr_info |= INTR_TYPE_HARD_EXCEPTION; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + vmx_clear_hlt(vcpu); } static bool vmx_rdtscp_supported(void) @@ -1419,7 +1435,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_pin_based_exec_control) < 0) return -EIO; - min = CPU_BASED_HLT_EXITING | + min = #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | @@ -1432,6 +1448,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING; + + if (yield_on_hlt) + min |= CPU_BASED_HLT_EXITING; + opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -2728,7 +2748,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_IDTR_BASE, 0); vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); - vmcs_write32(GUEST_ACTIVITY_STATE, 0); + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); @@ -2821,6 +2841,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) } else intr |= INTR_TYPE_EXT_INTR; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); + vmx_clear_hlt(vcpu); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -2848,6 +2869,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + vmx_clear_hlt(vcpu); } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -- 1.7.0.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html