[RFC PATCH part-3 22/22] pkvm: x86: Dynamically handle host MMIO EPT violation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx>

The host EPT is prepopulated for all the memory and MMIO in the low
address range, but not for the MMIO in high address range. If host VM is
accessing such MMIO, EPT violation happen and pKVM shall do the map
for it if the MMIO range belongs to the host VM, which means this MMIO
cannot be pKVM owned devices(eg. IOMMU) or any pass-through device to
the protected VM (this part will be supported in the future)

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx>
Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx>
---
 arch/x86/kvm/vmx/pkvm/hyp/ept.c    | 64 ++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/pkvm/hyp/ept.h    |  1 +
 arch/x86/kvm/vmx/pkvm/hyp/vmexit.c |  5 +++
 3 files changed, 70 insertions(+)

diff --git a/arch/x86/kvm/vmx/pkvm/hyp/ept.c b/arch/x86/kvm/vmx/pkvm/hyp/ept.c
index 10d226d3ec59..b0a542b47e83 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/ept.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/ept.c
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/memblock.h>
 #include <asm/kvm_pkvm.h>
+#include <asm/pkvm_spinlock.h>
 #include <mmu.h>
 #include <mmu/spte.h>
 
@@ -16,9 +17,12 @@
 #include "early_alloc.h"
 #include "pgtable.h"
 #include "ept.h"
+#include "memory.h"
+#include "debug.h"
 
 static struct hyp_pool host_ept_pool;
 static struct pkvm_pgtable host_ept;
+static pkvm_spinlock_t host_ept_lock = __PKVM_SPINLOCK_UNLOCKED;
 
 static void flush_tlb_noop(void) { };
 static void *host_ept_zalloc_page(void)
@@ -157,3 +161,63 @@ int pkvm_host_ept_init(struct pkvm_pgtable_cap *cap,
 	pkvm_hyp->host_vm.ept = &host_ept;
 	return pkvm_pgtable_init(&host_ept, &host_ept_mm_ops, &ept_ops, cap, true);
 }
+
+int handle_host_ept_violation(unsigned long gpa)
+{
+	unsigned long hpa;
+	struct mem_range range, cur;
+	bool is_memory = find_mem_range(gpa, &range);
+	u64 prot = HOST_EPT_DEF_MMIO_PROT;
+	int level;
+	int ret;
+
+	if (is_memory) {
+		pkvm_err("%s: not handle for memory address 0x%lx\n", __func__, gpa);
+		return -EPERM;
+	}
+
+	pkvm_spin_lock(&host_ept_lock);
+
+	pkvm_pgtable_lookup(&host_ept, gpa, &hpa, NULL, &level);
+	if (hpa != INVALID_ADDR) {
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	do {
+		unsigned long size = ept_level_to_size(level);
+
+		cur.start = ALIGN_DOWN(gpa, size);
+		cur.end = cur.start + size - 1;
+		/*
+		 * TODO:
+		 * check if this MMIO belongs to pkvm owned devices (e.g. IOMMU)
+		 * check if this MMIO belongs to a secure VM pass-through device.
+		 */
+		if ((1 << level & host_ept.allowed_pgsz) &&
+				mem_range_included(&cur, &range))
+			break;
+		level--;
+	} while (level != PG_LEVEL_NONE);
+
+	if (level == PG_LEVEL_NONE) {
+		pkvm_err("pkvm: No valid range: gpa 0x%lx, cur 0x%lx ~ 0x%lx size 0x%lx level %d\n",
+			 gpa, cur.start, cur.end, cur.end - cur.start + 1, level);
+		ret = -EPERM;
+		goto out;
+	}
+
+	pkvm_dbg("pkvm: %s: cur MMIO range 0x%lx ~ 0x%lx size 0x%lx level %d\n",
+		__func__, cur.start, cur.end, cur.end - cur.start + 1, level);
+
+	ret = pkvm_host_ept_map(cur.start, cur.start, cur.end - cur.start + 1,
+			   1 << level, prot);
+	if (ret == -ENOMEM) {
+		/* TODO: reclaim MMIO range pages first and try do map again */
+		pkvm_dbg("%s: no memory to set host ept for addr 0x%lx\n",
+			 __func__, gpa);
+	}
+out:
+	pkvm_spin_unlock(&host_ept_lock);
+	return ret;
+}
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/ept.h b/arch/x86/kvm/vmx/pkvm/hyp/ept.h
index 43c7e418db6a..d517bf8ec169 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/ept.h
+++ b/arch/x86/kvm/vmx/pkvm/hyp/ept.h
@@ -16,5 +16,6 @@ int pkvm_host_ept_unmap(unsigned long vaddr_start, unsigned long phys_start,
 		unsigned long size);
 int pkvm_host_ept_init(struct pkvm_pgtable_cap *cap, void *ept_pool_base,
 		unsigned long ept_pool_pages);
+int handle_host_ept_violation(unsigned long gpa);
 
 #endif
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c b/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c
index c9f522f5b064..88cbd276caf8 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/vmexit.c
@@ -7,6 +7,7 @@
 #include <asm/kvm_pkvm.h>
 #include <pkvm.h>
 #include "vmexit.h"
+#include "ept.h"
 #include "debug.h"
 
 #define CR4	4
@@ -166,6 +167,10 @@ int pkvm_main(struct kvm_vcpu *vcpu)
 			vcpu->arch.regs[VCPU_REGS_RAX] = handle_vmcall(vcpu);
 			skip_instruction = true;
 			break;
+		case EXIT_REASON_EPT_VIOLATION:
+			if (handle_host_ept_violation(vmcs_read64(GUEST_PHYSICAL_ADDRESS)))
+				skip_instruction = true;
+			break;
 		default:
 			pkvm_dbg("CPU%d: Unsupported vmexit reason 0x%x.\n", vcpu->cpu, vmx->exit_reason.full);
 			skip_instruction = true;
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux