[PATCH RFC v8 37/56] KVM: X86: Keep the NPT and RMP page level in sync

Michael Roth <michael.roth@xxxxxxx> · Mon, 20 Feb 2023 12:38:28 -0600

From: Brijesh Singh <brijesh.singh@xxxxxxx>

When running an SEV-SNP VM, the sPA used to index the RMP entry is
obtained through the NPT translation (gva->gpa->spa). The NPT page
level is checked against the page level programmed in the RMP entry.
If the page level does not match, then it will cause a nested page
fault with the RMP bit set to indicate the RMP violation.

Co-developed-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Signed-off-by: Jarkko Sakkinen <jarkko@xxxxxxxxxxx>
Signed-off-by: Ashish Kalra <Ashish.Kalra@xxxxxxx>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/kvm/mmu/mmu.c             |  9 ++++++
 arch/x86/kvm/svm/sev.c             | 51 ++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c             |  2 ++
 arch/x86/kvm/svm/svm.h             |  1 +
 6 files changed, 66 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index e116405cbb5f..87a087ec3277 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -135,6 +135,7 @@ KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
 KVM_X86_OP_OPTIONAL_RET0(fault_is_private);
 KVM_X86_OP_OPTIONAL_RET0(update_mem_attr)
 KVM_X86_OP_OPTIONAL(invalidate_restricted_mem)
+KVM_X86_OP_OPTIONAL(adjust_mapping_level)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a9363a6f779d..456b42cb167b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1731,6 +1731,8 @@ struct kvm_x86_ops {
 	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
 
 	void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+
+	void (*adjust_mapping_level)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 360af0c9997e..d8e5254f314d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3081,6 +3081,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
 
 out:
 	local_irq_restore(flags);
+
 	return level;
 }
 
@@ -3141,6 +3142,14 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
 						       fault->gfn, fault->max_level,
 						       fault->is_private);
+	if (kvm_slot_can_be_private(slot)) {
+		int req_level = fault->req_level;
+
+		static_call_cond(kvm_x86_adjust_mapping_level)(vcpu->kvm, fault->gfn, fault->pfn,
+							       &req_level);
+		fault->req_level = req_level;
+	}
+
 	if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
 		return;
 
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 515e22d0dc30..e8740c35be39 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3749,3 +3749,54 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
 
 	return p;
 }
+
+static bool is_gfn_range_shared(struct kvm *kvm, gfn_t start, gfn_t end)
+{
+	while (start++ < end)
+		if (kvm_mem_is_private(kvm, start))
+			return false;
+
+	return true;
+}
+
+void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level)
+{
+	int assigned;
+	int rmp_level = 1;
+	int level_orig = *level;
+
+	if (!sev_snp_guest(kvm))
+		return;
+
+	/* If there's an error retrieving RMP entry, stick with 4K mappings */
+	assigned = snp_lookup_rmpentry(pfn, &rmp_level);
+	if (unlikely(assigned < 0))
+		goto out_adjust;
+
+	if (!assigned) {
+		gfn_t huge_gfn;
+
+		/*
+		 * If all the pages are shared then no need to keep the RMP
+		 * and NPT in sync.
+		 */
+		huge_gfn = gfn & ~(PTRS_PER_PMD - 1);
+		if (is_gfn_range_shared(kvm, huge_gfn, huge_gfn + PTRS_PER_PMD))
+			goto out;
+	}
+
+	/*
+	 * The hardware installs 2MB TLB entries to access to 1GB pages,
+	 * therefore allow NPT to use 1GB pages when pfn was added as 2MB
+	 * in the RMP table.
+	 */
+	if (rmp_level == PG_LEVEL_2M && (*level == PG_LEVEL_1G))
+		goto out;
+
+out_adjust:
+	/* Adjust the level to keep the NPT and RMP in sync */
+	*level = min_t(size_t, *level, rmp_level);
+out:
+	pr_debug("%s: GFN: 0x%llx, PFN: 0x%llx, level: %d, rmp_level: %d, level_orig: %d, assigned: %d\n",
+		 __func__, gfn, pfn, *level, rmp_level, level_orig, assigned);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1061aaf66f0a..9eb750c8b04c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4835,6 +4835,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
 	.alloc_apic_backing_page = svm_alloc_apic_backing_page,
+
+	.adjust_mapping_level = sev_adjust_mapping_level,
 };
 
 /*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 740969b57425..cbd4594f1cca 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -706,6 +706,7 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
 void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
+void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level);
 
 /* vmenter.S */
 
-- 
2.25.1