[RFC PATCH part-2 08/17] pkvm: x86: Initailize vmcs guest state area for host vcpu

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



After deprivilege, host OS shall continue running with the state at that
point, therefore the CPU just deprivileges its context from VMX root
mode to non-root mode. The VMCS guest state of host vcpu is just current
state of physical CPU, so directly initialize the state from native.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx>
Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx>
---
 arch/x86/kvm/vmx/pkvm/pkvm_host.c | 114 +++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/pkvm/pkvm_host.c b/arch/x86/kvm/vmx/pkvm/pkvm_host.c
index 272205977e1e..5ed64ed2a801 100644
--- a/arch/x86/kvm/vmx/pkvm/pkvm_host.c
+++ b/arch/x86/kvm/vmx/pkvm/pkvm_host.c
@@ -136,7 +136,112 @@ static __init int pkvm_enable_vmx(struct pkvm_host_vcpu *vcpu)
 	return pkvm_cpu_vmxon(phys_addr);
 }
 
-static __init int pkvm_host_init_vmx(struct pkvm_host_vcpu *vcpu)
+static inline u32 get_ar(u16 sel)
+{
+	u32 access_rights;
+
+	if (sel == 0) {
+		access_rights = 0x10000;
+	} else {
+		asm ("lar %%ax, %%rax\n"
+				: "=a"(access_rights) : "a"(sel));
+		access_rights = access_rights >> 8;
+		access_rights = access_rights & 0xf0ff;
+	}
+
+	return access_rights;
+}
+
+#define init_guestsegment(seg, SEG, base, limit)		\
+	do  {							\
+		u16 sel;					\
+		u32 ar;						\
+								\
+		savesegment(seg, sel);				\
+		ar = get_ar(sel);				\
+		vmcs_write16(GUEST_##SEG##_SELECTOR, sel);	\
+		vmcs_write32(GUEST_##SEG##_AR_BYTES, ar);	\
+		vmcs_writel(GUEST_##SEG##_BASE, base);		\
+		vmcs_write32(GUEST_##SEG##_LIMIT, limit);	\
+	} while (0)
+
+static __init void init_guest_state_area_from_native(int cpu)
+{
+	u16 ldtr;
+	struct desc_ptr dt;
+	unsigned long msrl;
+	u32 high, low;
+
+	/* load CR regiesters */
+	vmcs_writel(GUEST_CR0, read_cr0() & ~X86_CR0_TS);
+	vmcs_writel(GUEST_CR3, __read_cr3());
+	vmcs_writel(GUEST_CR4, native_read_cr4());
+
+	/* load cs/ss/ds/es */
+	init_guestsegment(cs, CS, 0x0, 0xffffffff);
+	init_guestsegment(ss, SS, 0x0, 0xffffffff);
+	init_guestsegment(ds, DS, 0x0, 0xffffffff);
+	init_guestsegment(es, ES, 0x0, 0xffffffff);
+
+	/* load fs/gs */
+	rdmsrl(MSR_FS_BASE, msrl);
+	init_guestsegment(fs, FS, msrl, 0xffffffff);
+	rdmsrl(MSR_GS_BASE, msrl);
+	init_guestsegment(gs, GS, msrl, 0xffffffff);
+
+	/* load GDTR */
+	native_store_gdt(&dt);
+	vmcs_writel(GUEST_GDTR_BASE, dt.address);
+	vmcs_write32(GUEST_GDTR_LIMIT, dt.size);
+
+	/* load TR */
+	vmcs_write16(GUEST_TR_SELECTOR, GDT_ENTRY_TSS*8);
+	vmcs_write32(GUEST_TR_AR_BYTES, get_ar(GDT_ENTRY_TSS*8));
+	vmcs_writel(GUEST_TR_BASE, (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
+	vmcs_write32(GUEST_TR_LIMIT, __KERNEL_TSS_LIMIT);
+
+	/* load LDTR */
+	store_ldt(ldtr);
+	vmcs_write16(GUEST_LDTR_SELECTOR, ldtr);
+	vmcs_write32(GUEST_LDTR_AR_BYTES, 0x10000);
+	vmcs_writel(GUEST_LDTR_BASE, 0x0);
+	vmcs_write32(GUEST_LDTR_LIMIT, 0xffffffff);
+
+	store_idt(&dt);
+	vmcs_writel(GUEST_IDTR_BASE, dt.address);
+	vmcs_write32(GUEST_IDTR_LIMIT, dt.size);
+
+	/* set MSRs */
+	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+	rdmsr(MSR_IA32_SYSENTER_CS, low, high);
+	vmcs_write32(GUEST_SYSENTER_CS, low);
+
+	rdmsrl(MSR_IA32_SYSENTER_ESP, msrl);
+	vmcs_writel(GUEST_SYSENTER_ESP, msrl);
+
+	rdmsrl(MSR_IA32_SYSENTER_EIP, msrl);
+	vmcs_writel(GUEST_SYSENTER_EIP, msrl);
+
+	rdmsrl(MSR_EFER, msrl);
+	vmcs_write64(GUEST_IA32_EFER, msrl);
+
+	rdmsrl(MSR_IA32_CR_PAT, msrl);
+	vmcs_write64(GUEST_IA32_PAT, msrl);
+}
+
+static __init void init_guest_state_area(struct pkvm_host_vcpu *vcpu, int cpu)
+{
+	init_guest_state_area_from_native(cpu);
+
+	/*Guest non register state*/
+	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
+	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+	vmcs_write64(VMCS_LINK_POINTER, -1ull);
+}
+
+static __init int pkvm_host_init_vmx(struct pkvm_host_vcpu *vcpu, int cpu)
 {
 	struct vcpu_vmx *vmx = &vcpu->vmx;
 	int ret;
@@ -156,6 +261,11 @@ static __init int pkvm_host_init_vmx(struct pkvm_host_vcpu *vcpu)
 		return -ENOMEM;
 	}
 
+	vmx->loaded_vmcs = &vmx->vmcs01;
+	vmcs_load(vmx->loaded_vmcs->vmcs);
+
+	init_guest_state_area(vcpu, cpu);
+
 	return ret;
 }
 
@@ -339,7 +449,7 @@ static __init void pkvm_host_deprivilege_cpu(void *data)
 
 	enable_feature_control();
 
-	ret = pkvm_host_init_vmx(vcpu);
+	ret = pkvm_host_init_vmx(vcpu, cpu);
 	if (ret) {
 		pr_err("%s: init vmx failed\n", __func__);
 		goto out;
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux