[RFC PATCH part-5 18/22] pkvm: x86: Add VMREAD/VMWRITE emulation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Provide emulation for VMREAD and VMWRITE vmx instructions.

The VMREAD/VMWRITE of non-shadowing vmcs fields from host VM cause
vmexit. Add vmexit handlers to manage these non-shadowing vmcs fields,
mainly two different parts:
- emulated fields: record in cached_vmcs12 and set dirty_vmcs12 to
  indicate emulation needed before vmcs02 take effect.
- host state fields: record in cached_vmcs12 and restore as guest
  state for vmcs01 when return back to host VM.

Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx>
---
 arch/x86/include/asm/pkvm_image_vars.h |   3 +-
 arch/x86/kvm/vmx/pkvm/hyp/nested.c     | 138 +++++++++++++++++++++++++
 arch/x86/kvm/vmx/pkvm/hyp/nested.h     |   2 +
 arch/x86/kvm/vmx/pkvm/hyp/vmexit.c     |  10 ++
 4 files changed, 152 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pkvm_image_vars.h b/arch/x86/include/asm/pkvm_image_vars.h
index 598c60302bac..967ee323a5c0 100644
--- a/arch/x86/include/asm/pkvm_image_vars.h
+++ b/arch/x86/include/asm/pkvm_image_vars.h
@@ -16,7 +16,8 @@ PKVM_ALIAS(sme_me_mask);
 #endif
 
 PKVM_ALIAS(__default_kernel_pte_mask);
-
+PKVM_ALIAS(vmcs12_field_offsets);
+PKVM_ALIAS(nr_vmcs12_fields);
 #endif
 
 #endif
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/nested.c b/arch/x86/kvm/vmx/pkvm/hyp/nested.c
index dab002ff3c68..fd8755621cc8 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/nested.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/nested.c
@@ -229,6 +229,18 @@ static bool is_host_fields(unsigned long field)
 	return (((field) >> 10U) & 0x3U) == 3U;
 }
 
+static bool is_emulated_fields(unsigned long field_encoding)
+{
+	int i;
+
+	for (i = 0; i < max_emulated_fields; i++) {
+		if ((unsigned long)emulated_fields[i].encoding == field_encoding)
+			return true;
+	}
+
+	return false;
+}
+
 static void nested_vmx_result(enum VMXResult result, int error_number)
 {
 	u64 rflags = vmcs_readl(GUEST_RFLAGS);
@@ -671,6 +683,132 @@ int handle_vmclear(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int handle_vmwrite(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct pkvm_host_vcpu *pkvm_hvcpu = to_pkvm_hvcpu(vcpu);
+	struct shadow_vcpu_state *cur_shadow_vcpu = pkvm_hvcpu->current_shadow_vcpu;
+	struct vmcs12 *vmcs12 = (struct vmcs12 *)cur_shadow_vcpu->cached_vmcs12;
+	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	struct x86_exception e;
+	unsigned long field;
+	short offset;
+	gva_t gva;
+	int r, reg;
+	u64 value = 0;
+
+	if (check_vmx_permission(vcpu)) {
+		if (vmx->nested.current_vmptr == INVALID_GPA) {
+			nested_vmx_result(VMfailInvalid, 0);
+		} else {
+			if (instr_info & BIT(10)) {
+				reg = ((instr_info) >> 3) & 0xf;
+				value = vcpu->arch.regs[reg];
+			} else {
+				if (get_vmx_mem_address(vcpu, vmx->exit_qualification,
+							instr_info, &gva))
+					return 1;
+
+				r = read_gva(vcpu, gva, &value, 8, &e);
+				if (r < 0) {
+					/*TODO: handle memory failure exception */
+					return r;
+				}
+			}
+
+			reg = ((instr_info) >> 28) & 0xf;
+			field = vcpu->arch.regs[reg];
+
+			offset = get_vmcs12_field_offset(field);
+			if (offset < 0) {
+				nested_vmx_result(VMfailInvalid, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+				return 0;
+			}
+
+			/*TODO: check vcpu supports "VMWRITE to any supported field in the VMCS"*/
+			if (vmcs_field_readonly(field)) {
+				nested_vmx_result(VMfailInvalid, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
+				return 0;
+			}
+
+			/*
+			 * Some Intel CPUs intentionally drop the reserved bits of the AR byte
+			 * fields on VMWRITE.  Emulate this behavior to ensure consistent KVM
+			 * behavior regardless of the underlying hardware, e.g. if an AR_BYTE
+			 * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
+			 * from L1 will return a different value than VMREAD from L2 (L1 sees
+			 * the stripped down value, L2 sees the full value as stored by KVM).
+			 */
+			if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
+				value &= 0x1f0ff;
+
+			vmcs12_write_any(vmcs12, field, offset, value);
+
+			if (is_emulated_fields(field)) {
+				vmx->nested.dirty_vmcs12 = true;
+				nested_vmx_result(VMsucceed, 0);
+			} else if (is_host_fields(field)) {
+				nested_vmx_result(VMsucceed, 0);
+			} else {
+				pkvm_err("%s: not include emulated fields 0x%lx, please add!\n",
+						__func__, field);
+				nested_vmx_result(VMfailInvalid, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+			}
+		}
+	}
+
+	return 0;
+}
+
+int handle_vmread(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct pkvm_host_vcpu *pkvm_hvcpu = to_pkvm_hvcpu(vcpu);
+	struct shadow_vcpu_state *cur_shadow_vcpu = pkvm_hvcpu->current_shadow_vcpu;
+	struct vmcs12 *vmcs12 = (struct vmcs12 *)cur_shadow_vcpu->cached_vmcs12;
+	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	struct x86_exception e;
+	unsigned long field;
+	short offset;
+	gva_t gva = 0;
+	int r, reg;
+	u64 value;
+
+	if (check_vmx_permission(vcpu)) {
+		if (vmx->nested.current_vmptr == INVALID_GPA) {
+			nested_vmx_result(VMfailInvalid, 0);
+		} else {
+			/* Decode instruction info and find the field to read */
+			reg = ((instr_info) >> 28) & 0xf;
+			field = vcpu->arch.regs[reg];
+
+			offset = get_vmcs12_field_offset(field);
+			if (offset < 0) {
+				nested_vmx_result(VMfailInvalid, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+			} else {
+				value = vmcs12_read_any(vmcs12, field, offset);
+				if (instr_info & BIT(10)) {
+					reg = ((instr_info) >> 3) & 0xf;
+					vcpu->arch.regs[reg] = value;
+				} else {
+					if (get_vmx_mem_address(vcpu, vmx->exit_qualification,
+								instr_info, &gva))
+						return 1;
+
+					r = write_gva(vcpu, gva, &value, 8, &e);
+					if (r < 0) {
+						/*TODO: handle memory failure exception */
+						return r;
+					}
+				}
+				nested_vmx_result(VMsucceed, 0);
+			}
+		}
+	}
+
+	return 0;
+}
+
 void pkvm_init_nest(void)
 {
 	init_vmcs_shadow_fields();
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/nested.h b/arch/x86/kvm/vmx/pkvm/hyp/nested.h
index a228b0fdc15d..5fc76bdb135a 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/nested.h
+++ b/arch/x86/kvm/vmx/pkvm/hyp/nested.h
@@ -9,6 +9,8 @@ int handle_vmxon(struct kvm_vcpu *vcpu);
 int handle_vmxoff(struct kvm_vcpu *vcpu);
 int handle_vmptrld(struct kvm_vcpu *vcpu);
 int handle_vmclear(struct kvm_vcpu *vcpu);
+int handle_vmwrite(struct kvm_vcpu *vcpu);
+int handle_vmread(struct kvm_vcpu *vcpu);
 void pkvm_init_nest(void);
 
 #endif
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c b/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c
index b2cfb87983a8..d4f2a408e6e9 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c
@@ -216,6 +216,16 @@ int pkvm_main(struct kvm_vcpu *vcpu)
 			handle_vmclear(vcpu);
 			skip_instruction = true;
 			break;
+		case EXIT_REASON_VMREAD:
+			pkvm_dbg("CPU%d vmexit reason: WMREAD.\n", vcpu->cpu);
+			handle_vmread(vcpu);
+			skip_instruction = true;
+			break;
+		case EXIT_REASON_VMWRITE:
+			pkvm_dbg("CPU%d vmexit reason: VMWRITE.\n", vcpu->cpu);
+			handle_vmwrite(vcpu);
+			skip_instruction = true;
+			break;
 		case EXIT_REASON_XSETBV:
 			handle_xsetbv(vcpu);
 			skip_instruction = true;
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux