Re: [PATCH 3/5] Nested VMX patch 3 implements vmptrld and vmptrst

Avi Kivity <avi@xxxxxxxxxx> · Tue, 20 Oct 2009 13:24:33 +0900




On 10/15/2009 11:41 PM, oritw@xxxxxxxxxx wrote:

+
+struct __attribute__ ((__packed__)) shadow_vmcs {
   

Since this is in guest memory, we need it packed so the binary format is 
preserved across migration.  Please add a comment so it isn't changed 
(at least without changing the revision_id).

vmclear state should be here, that will help multiguest support.


  struct nested_vmx {
  	/* Has the level1 guest done vmxon? */
  	bool vmxon;
-
+	/* What is the location of the  vmcs l1 keeps for l2? (in level1 gpa) */
+	u64 vmptr;
   

Need to expose it for live migration.

  	/*
  	 * Level 2 state : includes vmcs,registers and
  	 * a copy of vmcs12 for vmread/vmwrite
  	 */
  	struct level_state *l2_state;
+	/* Level 1 state for switching to level 2 and back */
+	struct level_state *l1_state;
   

This creates a ton of duplication.

Some of the data is completely unnecessary, for example we can 
recalculate cr0 from HOST_CR0 and GUEST_CR0.

+
+static int vmptrld(struct kvm_vcpu *vcpu,
+		   u64 phys_addr)
+{
+	u8 error;
+
+	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
+		      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+		      : "cc");
+	if (error) {
+		printk(KERN_ERR "kvm: %s vmptrld %llx failed\n",
+		       __func__, phys_addr);
+		return 1;
+	}
+
+	return 0;
+}
+
  /*
   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
   * vcpu mutex is already taken.
@@ -736,15 +923,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  	}

  	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
-		u8 error;
-
  		per_cpu(current_vmcs, cpu) = vmx->vmcs;
-		asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
-			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
-			      : "cc");
-		if (error)
-			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vmx->vmcs, phys_addr);
+		vmptrld(vcpu, phys_addr);
  	}
   

This part of the patch is no longer needed.
+	if (cpu_has_vmx_msr_bitmap())
+		vmx->nested.l2_state->msr_bitmap = vmcs_read64(MSR_BITMAP);
+	else
+		vmx->nested.l2_state->msr_bitmap = 0;
+
+	vmx->nested.l2_state->io_bitmap_a = vmcs_read64(IO_BITMAP_A);
+	vmx->nested.l2_state->io_bitmap_b = vmcs_read64(IO_BITMAP_B);
+
   

This no longer works, since we don't load the guest vmcs.

+int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+			struct kvm_vcpu *vcpu);
   

Isn't this in a header somewhere?

+
+int read_guest_vmcs_gpa(struct kvm_vcpu *vcpu, u64 *gentry)
+{
+
+	int r = 0;
+
+	r = kvm_read_guest_virt(vcpu->arch.regs[VCPU_REGS_RAX], gentry,
+				sizeof(u64), vcpu);
+	if (r) {
+		printk(KERN_ERR "%s cannot read guest vmcs addr %lx : %d\n",
+		       __func__, vcpu->arch.regs[VCPU_REGS_RAX], r);
+		return r;
+	}
+
+	if (!IS_ALIGNED(*gentry, PAGE_SIZE)) {
+		printk(KERN_DEBUG "%s addr %llx not aligned\n",
+		       __func__, *gentry);
+		return 1;
+	}
+
+	return 0;
+}
+
   

Should go through the emulator to evaluate arguments.

+static int handle_vmptrld(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct page *vmcs_page;
+	u64 guest_vmcs_addr;
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	if (read_guest_vmcs_gpa(vcpu,&guest_vmcs_addr))
+		return 1;
+
+	if (create_l1_state(vcpu)) {
+		printk(KERN_ERR "%s create_l1_state failed\n", __func__);
+		return 1;
+	}
+
+	if (create_l2_state(vcpu)) {
+		printk(KERN_ERR "%s create_l2_state failed\n", __func__);
+		return 1;
+	}
   

return errors here, so we see the problem.

+
+static int handle_vmptrst(struct kvm_vcpu *vcpu)
+{
+	int r = 0;
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	r = kvm_write_guest_virt(vcpu->arch.regs[VCPU_REGS_RAX],
+				 (void *)&to_vmx(vcpu)->nested.vmptr,
+				 sizeof(u64), vcpu);
   

Emulator again.

+void save_vmcs(struct shadow_vmcs *dst)
+{

+	dst->io_bitmap_a = vmcs_read64(IO_BITMAP_A);
+	dst->io_bitmap_b = vmcs_read64(IO_BITMAP_B);
   

These (and many others) can never change due to a nested guest running, 
so no need to save them.

+	dst->virtual_apic_page_addr = vmcs_read64(VIRTUAL_APIC_PAGE_ADDR);
   

In general, you need to translate host physical addresses to guest 
physical addresses.

+	dst->apic_access_addr = vmcs_read64(APIC_ACCESS_ADDR);
+	if (enable_ept)
+		dst->ept_pointer = vmcs_read64(EPT_POINTER);
+
   

Not all hosts support these features.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html