Implement the VMLAUNCH and VMRESUME instructions, allowing a guest hypervisor to run its own guests. Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> --- arch/x86/kvm/vmx.c | 139 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) --- .before/arch/x86/kvm/vmx.c 2011-05-08 10:43:20.000000000 +0300 +++ .after/arch/x86/kvm/vmx.c 2011-05-08 10:43:20.000000000 +0300 @@ -346,6 +346,9 @@ struct nested_vmx { /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; + + /* Saving the VMCS that we used for running L1 */ + struct saved_vmcs saved_vmcs01; u64 vmcs01_tsc_offset; /* * Guest pages referred to in vmcs02 with host-physical pointers, so @@ -4880,6 +4883,21 @@ static int handle_vmclear(struct kvm_vcp return 1; } +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); + +/* Emulate the VMLAUNCH instruction */ +static int handle_vmlaunch(struct kvm_vcpu *vcpu) +{ + return nested_vmx_run(vcpu, true); +} + +/* Emulate the VMRESUME instruction */ +static int handle_vmresume(struct kvm_vcpu *vcpu) +{ + + return nested_vmx_run(vcpu, false); +} + enum vmcs_field_type { VMCS_FIELD_TYPE_U16 = 0, VMCS_FIELD_TYPE_U64 = 1, @@ -5160,11 +5178,11 @@ static int (*kvm_vmx_exit_handlers[])(st [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_VMCALL] = handle_vmcall, [EXIT_REASON_VMCLEAR] = handle_vmclear, - [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, + [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, [EXIT_REASON_VMPTRLD] = handle_vmptrld, [EXIT_REASON_VMPTRST] = handle_vmptrst, [EXIT_REASON_VMREAD] = handle_vmread, - [EXIT_REASON_VMRESUME] = handle_vmx_insn, + [EXIT_REASON_VMRESUME] = handle_vmresume, [EXIT_REASON_VMWRITE] = handle_vmwrite, [EXIT_REASON_VMOFF] = handle_vmoff, [EXIT_REASON_VMON] = handle_vmon, @@ -6021,6 +6039,123 @@ static int prepare_vmcs02(struct kvm_vcp return 0; } +/* + * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 + * for running an L2 nested guest. + */ +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) +{ + struct vmcs12 *vmcs12; + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + struct saved_vmcs *saved_vmcs02; + u32 low, high; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + skip_emulated_instruction(vcpu); + + /* + * The nested entry process starts with enforcing various prerequisites + * on vmcs12 as required by the Intel SDM, and act appropriately when + * they fail: As the SDM explains, some conditions should cause the + * instruction to fail, while others will cause the instruction to seem + * to succeed, but return an EXIT_REASON_INVALID_STATE. + * To speed up the normal (success) code path, we should avoid checking + * for misconfigurations which will anyway be caught by the processor + * when using the merged vmcs02. + */ + + vmcs12 = get_vmcs12(vcpu); + if (vmcs12->launch_state == launch) { + nested_vmx_failValid(vcpu, + launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS + : VMXERR_VMRESUME_NONLAUNCHED_VMCS); + return 1; + } + + if (vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_MOV_SS) { + nested_vmx_failValid(vcpu, + VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); + return 1; + } + + if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && + !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { + /*TODO: Also verify bits beyond physical address width are 0*/ + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + + if (vmcs12->vm_entry_msr_load_count > 0 || + vmcs12->vm_exit_msr_load_count > 0 || + vmcs12->vm_exit_msr_store_count > 0) { + if (printk_ratelimit()) + printk(KERN_WARNING + "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__); + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + + nested_vmx_pinbased_ctls(&low, &high); + if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control, low, high)) { + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + + if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || + ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { + nested_vmx_failValid(vcpu, + VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); + return 1; + } + + /* + * We're finally done with prerequisite checking, and can start with + * the nested entry. + */ + + enter_guest_mode(vcpu); + + vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); + + /* + * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember + * vmcs01, on which CPU it was last loaded, and whether it was launched + * (we need all these values next time we will use L1). Then recall + * these values from the last time vmcs02 was used. + */ + saved_vmcs02 = nested_get_current_vmcs02(vmx); + if (!saved_vmcs02) + return -ENOMEM; + + cpu = get_cpu(); + vmx->nested.saved_vmcs01.vmcs = vmx->vmcs; + vmx->nested.saved_vmcs01.cpu = vcpu->cpu; + vmx->nested.saved_vmcs01.launched = vmx->launched; + + vmx->vmcs = saved_vmcs02->vmcs; + vcpu->cpu = saved_vmcs02->cpu; + vmx->launched = saved_vmcs02->launched; + + vmx_vcpu_put(vcpu); + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; + put_cpu(); + + vmcs12->launch_state = 1; + + prepare_vmcs02(vcpu, vmcs12); + + /* + * Note no nested_vmx_succeed or nested_vmx_fail here. At this point + * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet + * returned as far as L1 is concerned. It will only return (and set + * the success flag) when L2 exits (see nested_vmx_vmexit()). + */ + return 1; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html