[RFC PATCH 3/7] KVM: SVM: Invoke a specified VMPL level VMSA for the vCPU

Tom Lendacky <thomas.lendacky@xxxxxxx> · Tue, 27 Aug 2024 16:59:27 -0500

Implement the SNP Run VMPL NAE event and MSR protocol to allow a guest to
request a different VMPL level VMSA be run for the vCPU. This allows the
guest to "call" an SVSM to process an SVSM request.

Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
---
 arch/x86/include/asm/sev-common.h |   6 ++
 arch/x86/kvm/svm/sev.c            | 126 +++++++++++++++++++++++++++++-
 arch/x86/kvm/svm/svm.c            |  13 +++
 arch/x86/kvm/svm/svm.h            |  18 ++++-
 4 files changed, 158 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index d63c861ef91f..6f7134aada83 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -114,6 +114,8 @@ enum psc_op {
 
 /* GHCB Run at VMPL Request/Response */
 #define GHCB_MSR_VMPL_REQ		0x016
+#define GHCB_MSR_VMPL_LEVEL_POS		32
+#define GHCB_MSR_VMPL_LEVEL_MASK	GENMASK_ULL(7, 0)
 #define GHCB_MSR_VMPL_REQ_LEVEL(v)			\
 	/* GHCBData[39:32] */				\
 	(((u64)(v) & GENMASK_ULL(7, 0) << 32) |		\
@@ -121,6 +123,10 @@ enum psc_op {
 	GHCB_MSR_VMPL_REQ)
 
 #define GHCB_MSR_VMPL_RESP		0x017
+#define GHCB_MSR_VMPL_ERROR_POS		32
+#define GHCB_MSR_VMPL_ERROR_MASK	GENMASK_ULL(31, 0)
+#define GHCB_MSR_VMPL_RSVD_POS		12
+#define GHCB_MSR_VMPL_RSVD_MASK		GENMASK_ULL(19, 0)
 #define GHCB_MSR_VMPL_RESP_VAL(v)			\
 	/* GHCBData[63:32] */				\
 	(((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c22b6f51ec81..e0f5122061e6 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3421,6 +3421,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 		    control->exit_info_1 == control->exit_info_2)
 			goto vmgexit_err;
 		break;
+	case SVM_VMGEXIT_SNP_RUN_VMPL:
+		if (!sev_snp_guest(vcpu->kvm))
+			goto vmgexit_err;
+		break;
 	default:
 		reason = GHCB_ERR_INVALID_EVENT;
 		goto vmgexit_err;
@@ -3935,21 +3939,25 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
 }
 
 /*
- * Invoked as part of svm_vcpu_reset() processing of an init event.
+ * Invoked as part of svm_vcpu_reset() processing of an init event
+ * or as part of switching to a new VMPL.
  */
-void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
+bool sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	bool init = false;
 	int ret;
 
 	if (!sev_snp_guest(vcpu->kvm))
-		return;
+		return false;
 
 	mutex_lock(&svm->sev_es.snp_vmsa_mutex);
 
 	if (!tgt_vmpl_ap_waiting_for_reset(svm))
 		goto unlock;
 
+	init = true;
+
 	tgt_vmpl_ap_waiting_for_reset(svm) = false;
 
 	ret = __sev_snp_update_protected_guest_state(vcpu);
@@ -3958,6 +3966,8 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
 
 unlock:
 	mutex_unlock(&svm->sev_es.snp_vmsa_mutex);
+
+	return init;
 }
 
 static int sev_snp_ap_creation(struct vcpu_svm *svm)
@@ -4255,6 +4265,92 @@ static void sev_get_apic_ids(struct vcpu_svm *svm)
 	kvfree(desc);
 }
 
+static int __sev_run_vmpl_vmsa(struct vcpu_svm *svm, unsigned int new_vmpl)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	struct vmpl_switch_sa *old_vmpl_sa;
+	struct vmpl_switch_sa *new_vmpl_sa;
+	unsigned int old_vmpl;
+
+	if (new_vmpl >= SVM_SEV_VMPL_MAX)
+		return -EINVAL;
+	new_vmpl = array_index_nospec(new_vmpl, SVM_SEV_VMPL_MAX);
+
+	old_vmpl = svm->sev_es.snp_current_vmpl;
+	svm->sev_es.snp_target_vmpl = new_vmpl;
+
+	if (svm->sev_es.snp_target_vmpl == svm->sev_es.snp_current_vmpl ||
+	    sev_snp_init_protected_guest_state(vcpu))
+		return 0;
+
+	/* If the VMSA is not valid, return an error */
+	if (!VALID_PAGE(vmpl_vmsa_hpa(svm, new_vmpl)))
+		return -EINVAL;
+
+	/* Unmap the current GHCB */
+	sev_es_unmap_ghcb(svm);
+
+	/* Save some current VMCB values */
+	svm->sev_es.ghcb_gpa[old_vmpl]		= svm->vmcb->control.ghcb_gpa;
+
+	old_vmpl_sa = &svm->sev_es.vssa[old_vmpl];
+	old_vmpl_sa->int_state			= svm->vmcb->control.int_state;
+	old_vmpl_sa->exit_int_info		= svm->vmcb->control.exit_int_info;
+	old_vmpl_sa->exit_int_info_err		= svm->vmcb->control.exit_int_info_err;
+	old_vmpl_sa->cr0			= vcpu->arch.cr0;
+	old_vmpl_sa->cr2			= vcpu->arch.cr2;
+	old_vmpl_sa->cr4			= vcpu->arch.cr4;
+	old_vmpl_sa->cr8			= vcpu->arch.cr8;
+	old_vmpl_sa->efer			= vcpu->arch.efer;
+
+	/* Restore some previous VMCB values */
+	svm->vmcb->control.vmsa_pa		= vmpl_vmsa_hpa(svm, new_vmpl);
+	svm->vmcb->control.ghcb_gpa		= svm->sev_es.ghcb_gpa[new_vmpl];
+
+	new_vmpl_sa = &svm->sev_es.vssa[new_vmpl];
+	svm->vmcb->control.int_state		= new_vmpl_sa->int_state;
+	svm->vmcb->control.exit_int_info	= new_vmpl_sa->exit_int_info;
+	svm->vmcb->control.exit_int_info_err	= new_vmpl_sa->exit_int_info_err;
+	vcpu->arch.cr0				= new_vmpl_sa->cr0;
+	vcpu->arch.cr2				= new_vmpl_sa->cr2;
+	vcpu->arch.cr4				= new_vmpl_sa->cr4;
+	vcpu->arch.cr8				= new_vmpl_sa->cr8;
+	vcpu->arch.efer				= new_vmpl_sa->efer;
+
+	svm->sev_es.snp_current_vmpl = new_vmpl;
+
+	vmcb_mark_all_dirty(svm->vmcb);
+
+	return 0;
+}
+
+static void sev_run_vmpl_vmsa(struct vcpu_svm *svm)
+{
+	struct ghcb *ghcb = svm->sev_es.ghcb;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	unsigned int vmpl;
+	int ret;
+
+	/* TODO: Does this need to be synced for original VMPL ... */
+	ghcb_set_sw_exit_info_1(ghcb, 0);
+	ghcb_set_sw_exit_info_2(ghcb, 0);
+
+	if (!sev_snp_guest(vcpu->kvm))
+		goto err;
+
+	vmpl = lower_32_bits(svm->vmcb->control.exit_info_1);
+
+	ret = __sev_run_vmpl_vmsa(svm, vmpl);
+	if (ret)
+		goto err;
+
+	return;
+
+err:
+	ghcb_set_sw_exit_info_1(ghcb, 2);
+	ghcb_set_sw_exit_info_2(ghcb, 0);
+}
+
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -4366,6 +4462,25 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 
 		ret = snp_begin_psc_msr(svm, control->ghcb_gpa);
 		break;
+	case GHCB_MSR_VMPL_REQ: {
+		unsigned int vmpl;
+
+		vmpl = get_ghcb_msr_bits(svm, GHCB_MSR_VMPL_LEVEL_MASK, GHCB_MSR_VMPL_LEVEL_POS);
+
+		/*
+		 * Set as successful in advance, since this value will be saved
+		 * as part of the VMPL switch and then restored if switching
+		 * back to the calling VMPL level.
+		 */
+		set_ghcb_msr_bits(svm, 0, GHCB_MSR_VMPL_ERROR_MASK, GHCB_MSR_VMPL_ERROR_POS);
+		set_ghcb_msr_bits(svm, 0, GHCB_MSR_VMPL_RSVD_MASK, GHCB_MSR_VMPL_RSVD_POS);
+		set_ghcb_msr_bits(svm, GHCB_MSR_VMPL_RESP, GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS);
+
+		if (__sev_run_vmpl_vmsa(svm, vmpl))
+			set_ghcb_msr_bits(svm, 1, GHCB_MSR_VMPL_ERROR_MASK, GHCB_MSR_VMPL_ERROR_POS);
+
+		break;
+	}
 	case GHCB_MSR_TERM_REQ: {
 		u64 reason_set, reason_code;
 
@@ -4538,6 +4653,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 	case SVM_VMGEXIT_GET_APIC_IDS:
 		sev_get_apic_ids(svm);
 
+		ret = 1;
+		break;
+	case SVM_VMGEXIT_SNP_RUN_VMPL:
+		sev_run_vmpl_vmsa(svm);
+
 		ret = 1;
 		break;
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ca4bc53fb14a..586c26627bb1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4253,6 +4253,19 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
 	}
 	vcpu->arch.regs_dirty = 0;
 
+	if (sev_snp_is_rinj_active(vcpu)) {
+		/*
+		 * When SEV-SNP is running with restricted injection, the V_IRQ
+		 * bit may be cleared on exit because virtual interrupt support
+		 * is ignored. To support multiple VMPLs, some of which may not
+		 * be running with restricted injection, ensure to reset the
+		 * V_IRQ bit if a virtual interrupt is meant to be active (the
+		 * virtual interrupt priority mask is non-zero).
+		 */
+		if (svm->vmcb->control.int_ctl & V_INTR_PRIO_MASK)
+			svm->vmcb->control.int_ctl |= V_IRQ_MASK;
+	}
+
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 45a37d16b6f7..d1ef349556f7 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -198,6 +198,18 @@ struct svm_nested_state {
 	bool force_msr_bitmap_recalc;
 };
 
+struct vmpl_switch_sa {
+	u32 int_state;
+	u32 exit_int_info;
+	u32 exit_int_info_err;
+
+	unsigned long cr0;
+	unsigned long cr2;
+	unsigned long cr4;
+	unsigned long cr8;
+	u64 efer;
+};
+
 #define vmpl_vmsa(s, v)				((s)->sev_es.vmsa_info[(v)].vmsa)
 #define vmpl_vmsa_gpa(s, v)			((s)->sev_es.vmsa_info[(v)].gpa)
 #define vmpl_vmsa_hpa(s, v)			((s)->sev_es.vmsa_info[(v)].hpa)
@@ -256,6 +268,8 @@ struct vcpu_sev_es_state {
 	struct mutex snp_vmsa_mutex; /* Used to handle concurrent updates of VMSA. */
 	unsigned int snp_current_vmpl;
 	unsigned int snp_target_vmpl;
+
+	struct vmpl_switch_sa vssa[SVM_SEV_VMPL_MAX];
 };
 
 struct vcpu_svm {
@@ -776,7 +790,7 @@ int sev_cpu_init(struct svm_cpu_data *sd);
 int sev_dev_get_attr(u32 group, u64 attr, u64 *val);
 extern unsigned int max_sev_asid;
 void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
-void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu);
+bool sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu);
 int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
 void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
 int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn);
@@ -800,7 +814,7 @@ static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; }
 static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; }
 #define max_sev_asid 0
 static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {}
-static inline void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) {}
+static inline bool sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) { return false; }
 static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
 {
 	return 0;
-- 
2.43.2