[PATCH Part2 v5 39/45] KVM: SVM: Introduce ops for the post gfn map and unmap

Brijesh Singh <brijesh.singh@xxxxxxx> · Fri, 20 Aug 2021 10:59:12 -0500

When SEV-SNP is enabled in the guest VM, the guest memory pages can
either be a private or shared. A write from the hypervisor goes through
the RMP checks. If hardware sees that hypervisor is attempting to write
to a guest private page, then it triggers an RMP violation #PF.

To avoid the RMP violation, add post_{map,unmap}_gfn() ops that can be
used to verify that its safe to map a given guest page. Use the SRCU to
protect against the page state change for existing mapped pages.

Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
---
 arch/x86/include/asm/kvm-x86-ops.h |  2 +
 arch/x86/include/asm/kvm_host.h    |  4 ++
 arch/x86/kvm/svm/sev.c             | 69 +++++++++++++++++++++-----
 arch/x86/kvm/svm/svm.c             |  4 ++
 arch/x86/kvm/svm/svm.h             |  8 +++
 arch/x86/kvm/x86.c                 | 78 +++++++++++++++++++++++++++---
 6 files changed, 146 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 371756c7f8f4..c09bd40e0160 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -124,6 +124,8 @@ KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP_NULL(complete_emulated_msr)
 KVM_X86_OP(alloc_apic_backing_page)
 KVM_X86_OP_NULL(rmp_page_level_adjust)
+KVM_X86_OP(post_map_gfn)
+KVM_X86_OP(post_unmap_gfn)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a6e764458f3e..5ac1ff097e8c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1463,7 +1463,11 @@ struct kvm_x86_ops {
 	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 
 	void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+
 	void (*rmp_page_level_adjust)(struct kvm *kvm, kvm_pfn_t pfn, int *level);
+
+	int (*post_map_gfn)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token);
+	void (*post_unmap_gfn)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0de85ed63e9b..65b578463271 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -336,6 +336,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 		if (ret)
 			goto e_free;
 
+		init_srcu_struct(&sev->psc_srcu);
 		ret = sev_snp_init(&argp->error);
 	} else {
 		ret = sev_platform_init(&argp->error);
@@ -2293,6 +2294,7 @@ void sev_vm_destroy(struct kvm *kvm)
 			WARN_ONCE(1, "Failed to free SNP guest context, leaking asid!\n");
 			return;
 		}
+		cleanup_srcu_struct(&sev->psc_srcu);
 	} else {
 		sev_unbind_asid(kvm, sev->handle);
 	}
@@ -2494,23 +2496,32 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
 	kfree(svm->ghcb_sa);
 }
 
-static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map, int *token)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	u64 gfn = gpa_to_gfn(control->ghcb_gpa);
+	struct kvm_vcpu *vcpu = &svm->vcpu;
 
-	if (kvm_vcpu_map(&svm->vcpu, gfn, map)) {
+	if (kvm_vcpu_map(vcpu, gfn, map)) {
 		/* Unable to map GHCB from guest */
 		pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
 		return -EFAULT;
 	}
 
+	if (sev_post_map_gfn(vcpu->kvm, map->gfn, map->pfn, token)) {
+		kvm_vcpu_unmap(vcpu, map, false);
+		return -EBUSY;
+	}
+
 	return 0;
 }
 
-static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map, int token)
 {
-	kvm_vcpu_unmap(&svm->vcpu, map, true);
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+
+	kvm_vcpu_unmap(vcpu, map, true);
+	sev_post_unmap_gfn(vcpu->kvm, map->gfn, map->pfn, token);
 }
 
 static void dump_ghcb(struct vcpu_svm *svm)
@@ -2518,8 +2529,9 @@ static void dump_ghcb(struct vcpu_svm *svm)
 	struct kvm_host_map map;
 	unsigned int nbits;
 	struct ghcb *ghcb;
+	int token;
 
-	if (svm_map_ghcb(svm, &map))
+	if (svm_map_ghcb(svm, &map, &token))
 		return;
 
 	ghcb = map.hva;
@@ -2544,7 +2556,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
 	pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
 
 e_unmap:
-	svm_unmap_ghcb(svm, &map);
+	svm_unmap_ghcb(svm, &map, token);
 }
 
 static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
@@ -2552,8 +2564,9 @@ static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	struct kvm_host_map map;
 	struct ghcb *ghcb;
+	int token;
 
-	if (svm_map_ghcb(svm, &map))
+	if (svm_map_ghcb(svm, &map, &token))
 		return false;
 
 	ghcb = map.hva;
@@ -2579,7 +2592,7 @@ static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
 
 	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb);
 
-	svm_unmap_ghcb(svm, &map);
+	svm_unmap_ghcb(svm, &map, token);
 
 	return true;
 }
@@ -2636,8 +2649,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	struct kvm_host_map map;
 	struct ghcb *ghcb;
+	int token;
 
-	if (svm_map_ghcb(svm, &map))
+	if (svm_map_ghcb(svm, &map, &token))
 		return -EFAULT;
 
 	ghcb = map.hva;
@@ -2739,7 +2753,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
 
 	sev_es_sync_from_ghcb(svm, ghcb);
 
-	svm_unmap_ghcb(svm, &map);
+	svm_unmap_ghcb(svm, &map, token);
 	return 0;
 
 vmgexit_err:
@@ -2760,7 +2774,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
 	vcpu->run->internal.data[0] = *exit_code;
 	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
 
-	svm_unmap_ghcb(svm, &map);
+	svm_unmap_ghcb(svm, &map, token);
 	return -EINVAL;
 }
 
@@ -3036,6 +3050,9 @@ static int __snp_handle_page_state_change(struct kvm_vcpu *vcpu, enum psc_op op,
 				return PSC_UNDEF_ERR;
 		}
 
+		/* Wait for all the existing mapped gfn to unmap */
+		synchronize_srcu_expedited(&sev->psc_srcu);
+
 		write_lock(&kvm->mmu_lock);
 
 		rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level);
@@ -3604,3 +3621,33 @@ void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level)
 	/* Adjust the level to keep the NPT and RMP in sync */
 	*level = min_t(size_t, *level, rmp_level);
 }
+
+int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	int level;
+
+	if (!sev_snp_guest(kvm))
+		return 0;
+
+	*token = srcu_read_lock(&sev->psc_srcu);
+
+	/* If pfn is not added as private then fail */
+	if (snp_lookup_rmpentry(pfn, &level) == 1) {
+		srcu_read_unlock(&sev->psc_srcu, *token);
+		pr_err_ratelimited("failed to map private gfn 0x%llx pfn 0x%llx\n", gfn, pfn);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+	if (!sev_snp_guest(kvm))
+		return;
+
+	srcu_read_unlock(&sev->psc_srcu, token);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5f73f21a37a1..3784d389247b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4679,7 +4679,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 
 	.alloc_apic_backing_page = svm_alloc_apic_backing_page,
+
 	.rmp_page_level_adjust = sev_rmp_page_level_adjust,
+
+	.post_map_gfn = sev_post_map_gfn,
+	.post_unmap_gfn = sev_post_unmap_gfn,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index d10f7166b39d..ff91184f9b4a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -76,16 +76,22 @@ struct kvm_sev_info {
 	bool active;		/* SEV enabled guest */
 	bool es_active;		/* SEV-ES enabled guest */
 	bool snp_active;	/* SEV-SNP enabled guest */
+
 	unsigned int asid;	/* ASID used for this guest */
 	unsigned int handle;	/* SEV firmware handle */
 	int fd;			/* SEV device fd */
+
 	unsigned long pages_locked; /* Number of pages locked */
 	struct list_head regions_list;  /* List of registered regions */
+
 	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
+
 	struct kvm *enc_context_owner; /* Owner of copied encryption context */
 	struct misc_cg *misc_cg; /* For misc cgroup accounting */
+
 	u64 snp_init_flags;
 	void *snp_context;      /* SNP guest context page */
+	struct srcu_struct psc_srcu;
 };
 
 struct kvm_svm {
@@ -618,6 +624,8 @@ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
 void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
 void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level);
+int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token);
+void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token);
 
 /* vmenter.S */
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index afcdc75a99f2..bf4389ffc88f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3095,6 +3095,65 @@ static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
 	return (vcpu->arch.apf.msr_en_val & mask) == mask;
 }
 
+static int kvm_map_gfn_protected(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
+				 struct gfn_to_pfn_cache *cache, bool atomic, int *token)
+{
+	int ret;
+
+	ret = kvm_map_gfn(vcpu, gfn, map, cache, atomic);
+	if (ret)
+		return ret;
+
+	if (kvm_x86_ops.post_map_gfn) {
+		ret = static_call(kvm_x86_post_map_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+		if (ret)
+			kvm_unmap_gfn(vcpu, map, cache, false, atomic);
+	}
+
+	return ret;
+}
+
+static int kvm_unmap_gfn_protected(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+				   struct gfn_to_pfn_cache *cache, bool dirty,
+				   bool atomic, int token)
+{
+	int ret;
+
+	ret = kvm_unmap_gfn(vcpu, map, cache, dirty, atomic);
+
+	if (kvm_x86_ops.post_unmap_gfn)
+		static_call(kvm_x86_post_unmap_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+
+	return ret;
+}
+
+static int kvm_vcpu_map_protected(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
+				  int *token)
+{
+	int ret;
+
+	ret = kvm_vcpu_map(vcpu, gpa, map);
+	if (ret)
+		return ret;
+
+	if (kvm_x86_ops.post_map_gfn) {
+		ret = static_call(kvm_x86_post_map_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+		if (ret)
+			kvm_vcpu_unmap(vcpu, map, false);
+	}
+
+	return ret;
+}
+
+static void kvm_vcpu_unmap_protected(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+				     bool dirty, int token)
+{
+	kvm_vcpu_unmap(vcpu, map, dirty);
+
+	if (kvm_x86_ops.post_unmap_gfn)
+		static_call(kvm_x86_post_unmap_gfn)(vcpu->kvm, map->gfn, map->pfn, token);
+}
+
 static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
@@ -3185,6 +3244,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 {
 	struct kvm_host_map map;
 	struct kvm_steal_time *st;
+	int token;
 
 	if (kvm_xen_msr_enabled(vcpu->kvm)) {
 		kvm_xen_runstate_set_running(vcpu);
@@ -3195,8 +3255,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 		return;
 
 	/* -EAGAIN is returned in atomic context so we can just return. */
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
-			&map, &vcpu->arch.st.cache, false))
+	if (kvm_map_gfn_protected(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+				  &map, &vcpu->arch.st.cache, false, &token))
 		return;
 
 	st = map.hva +
@@ -3234,7 +3294,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 
 	st->version += 1;
 
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+	kvm_unmap_gfn_protected(vcpu, &map, &vcpu->arch.st.cache, true, false, token);
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -4271,6 +4331,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 {
 	struct kvm_host_map map;
 	struct kvm_steal_time *st;
+	int token;
 
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
@@ -4278,8 +4339,8 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.st.preempted)
 		return;
 
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
-			&vcpu->arch.st.cache, true))
+	if (kvm_map_gfn_protected(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+				  &map, &vcpu->arch.st.cache, true, &token))
 		return;
 
 	st = map.hva +
@@ -4287,7 +4348,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 
 	st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
 
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+	kvm_unmap_gfn_protected(vcpu, &map, &vcpu->arch.st.cache, true, true, token);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -6816,6 +6877,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
 	gpa_t gpa;
 	char *kaddr;
 	bool exchanged;
+	int token;
 
 	/* guests cmpxchg8b have to be emulated atomically */
 	if (bytes > 8 || (bytes & (bytes - 1)))
@@ -6839,7 +6901,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
 	if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
 		goto emul_write;
 
-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
+	if (kvm_vcpu_map_protected(vcpu, gpa_to_gfn(gpa), &map, &token))
 		goto emul_write;
 
 	kaddr = map.hva + offset_in_page(gpa);
@@ -6861,7 +6923,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
 		BUG();
 	}
 
-	kvm_vcpu_unmap(vcpu, &map, true);
+	kvm_vcpu_unmap_protected(vcpu, &map, true, token);
 
 	if (!exchanged)
 		return X86EMUL_CMPXCHG_FAILED;
-- 
2.17.1