Re: [PATCH 19/28] nVMX: Exiting from L2 to L1

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/08/2010 07:09 PM, Nadav Har'El wrote:
This patch implements nested_vmx_vmexit(), called when the nested L2 guest
exits and we want to run its L1 parent and let it handle this exit.

Note that this will not necessarily be called on every L2 exit. L0 may decide
to handle a particular exit on its own, without L1's involvement; In that
case, L0 will handle the exit, and resume running L2, without running L1 and
without calling nested_vmx_vmexit(). The logic for deciding whether to handle
a particular exit in L1 or in L0, i.e., whether to call nested_vmx_vmexit(),
will appear in the next patch.


+void prepare_vmcs12(struct kvm_vcpu *vcpu)
+{
+	struct vmcs_fields *vmcs12 = get_vmcs12_fields(vcpu);
+
+	/* update guest state fields: */
+	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
+	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
+
+	vmcs12->guest_dr7 = vmcs_readl(GUEST_DR7);
+	vmcs12->guest_rsp = vmcs_readl(GUEST_RSP);
+	vmcs12->guest_rip = vmcs_readl(GUEST_RIP);
+	vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);

kvm_register_read() etc.

+
+static int nested_vmx_vmexit(struct kvm_vcpu *vcpu, bool is_interrupt)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int efer_offset;
+	struct vmcs_fields *vmcs01 = vmx->nested.vmcs01_fields;
+
+	if (!is_guest_mode(vcpu)) {
+		printk(KERN_INFO "WARNING: %s called but not in nested mode\n",
+		       __func__);
+		return 0;
+	}
+
+	sync_cached_regs_to_vmcs(vcpu);
+
+	prepare_vmcs12(vcpu);
+
+	if (is_interrupt)
+		get_vmcs12_fields(vcpu)->vm_exit_reason =
+			EXIT_REASON_EXTERNAL_INTERRUPT;
+
+	vmx->nested.current_vmcs12->launched = vmx->launched;
+	vmx->nested.current_vmcs12->cpu = vcpu->cpu;
+
+	vmx->vmcs = vmx->nested.vmcs01;
+	vcpu->cpu = vmx->nested.l1_state.cpu;
+	vmx->launched = vmx->nested.l1_state.launched;
+
+	leave_guest_mode(vcpu);
+
+	vmx_vcpu_load(vcpu, get_cpu());
+	put_cpu();
+
+	vcpu->arch.efer = vmx->nested.l1_state.efer;
+	if ((vcpu->arch.efer&  EFER_LMA)&&
+	    !(vcpu->arch.efer&  EFER_SCE))
+		vcpu->arch.efer |= EFER_SCE;

set_efer() in x86.c for the side effects.

+
+	efer_offset = __find_msr_index(vmx, MSR_EFER);
+	if (update_transition_efer(vmx, efer_offset))
+		wrmsrl(MSR_EFER, vmx->guest_msrs[efer_offset].data);

Including this.

+
+	/*
+	 * L2 perhaps switched to real mode and set vmx->rmode, but we're back
+	 * in L1 and as it is running VMX, it can't be in real mode.
+	 */
+	vmx->rmode.vm86_active = 0;

L2 cannot be in real mode since vmx does not support it (except for unrestricted guest, in which case rmode.vm86_active would be clear).

+
+	/*
+	 * If L1 set the HOST_* fields in the VMCS, when exiting from L2 to L1
+	 * we need to return those, not L1's old values.
+	 */
+	vmcs_writel(GUEST_RIP, get_vmcs12_fields(vcpu)->host_rip);
+	vmcs_writel(GUEST_RSP, get_vmcs12_fields(vcpu)->host_rsp);

kvm_register_write() etc.

+	vmcs01->cr0_read_shadow = get_vmcs12_fields(vcpu)->host_cr0;
+
+	/*
+	 * We're running a regular L1 guest again, so we do the regular KVM
+	 * thing: run vmx_set_cr0 with the cr0 bits the guest thinks it has.
+	 * vmx_set_cr0 might use slightly different bits on the new guest_cr0
+	 * it sets, e.g., add TS when !fpu_active.
+	 * Note that vmx_set_cr0 refers to rmode and efer set above.
+	 */
+	vmx_set_cr0(vcpu, guest_readable_cr0(vmcs01));

kvm_set_cr0() takes care of some extra stuff. Why guest_readable_cr0? want vmcs12->host_cr0.

+	/*
+	 * If we did fpu_activate()/fpu_deactive() during l2's run, we need to
+	 * apply the same changes to l1's vmcs. We just set cr0 correctly, but
+	 * now we need to also update cr0_guest_host_mask and exception_bitmap.
+	 */
+	vmcs_write32(EXCEPTION_BITMAP,
+		(vmcs01->exception_bitmap&  ~(1u<<NM_VECTOR)) |
+			(vcpu->fpu_active ? 0 : (1u<<NM_VECTOR)));
+	vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0);
+	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);

Should be a side effect of kvm_set_cr0().

+
+	vmx_set_cr4(vcpu, guest_readable_cr4(vmcs01));
+	vcpu->arch.cr4_guest_owned_bits = ~vmcs01->cr4_guest_host_mask;

kvm_set_cr4(vmcs12->host_cr4)

+
+	if (enable_ept) {
+		/* shadow page tables on EPT: */
+		set_cr3_and_pdptrs(vcpu, get_vmcs12_fields(vcpu)->host_cr3);
+	} else {
+		/* shadow page tables on shadow page tables: */
+		kvm_set_cr3(vcpu, vmx->nested.l1_arch_cr3);
+		kvm_mmu_reset_context(vcpu);
+		kvm_mmu_load(vcpu);
+	}

kvm_set_cr3() should suffice in both cases. kvm_mmu_reset_context()/kvm_mmu_load() is probably unneeded.

+
+	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs01->guest_rsp);
+	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs01->guest_rip);

vmcs12->host_rip

+
+	if (unlikely(vmx->fail)) {
+		/*
+		 * When L1 launches L2 and then we (L0) fail to launch L2,
+		 * we nested_vmx_vmexit back to L1, but now should let it know
+		 * that the VMLAUNCH failed - with the same error that we
+		 * got when launching L2.
+		 */
+		vmx->fail = 0;
+		nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
+	} else
+		nested_vmx_succeed(vcpu);
+
+	return 0;
+}
+
  static struct kvm_x86_ops vmx_x86_ops = {
  	.cpu_has_kvm_support = cpu_has_kvm_support,
  	.disabled_by_bios = vmx_disabled_by_bios,


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux