[PATCH Part1 v5 28/38] x86/compressed/64: enable SEV-SNP-validated CPUID in #VC handler

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Michael Roth <michael.roth@xxxxxxx>

CPUID instructions generate a #VC exception for SEV-ES/SEV-SNP guests,
for which early handlers are currently set up to handle. In the case
of SEV-SNP, guests can use a special location in guest memory address
space that has been pre-populated with firmware-validated CPUID
information to look up the relevant CPUID values rather than
requesting them from hypervisor via a VMGEXIT.

Determine the location of the CPUID memory address in advance of any
CPUID instructions/exceptions and, when available, use it to handle
the CPUID lookup.

Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
---
 arch/x86/boot/compressed/efi.c     |   1 +
 arch/x86/boot/compressed/head_64.S |   1 +
 arch/x86/boot/compressed/idt_64.c  |   7 +-
 arch/x86/boot/compressed/misc.h    |   1 +
 arch/x86/boot/compressed/sev.c     |   3 +
 arch/x86/include/asm/sev-common.h  |   2 +
 arch/x86/include/asm/sev.h         |   3 +
 arch/x86/kernel/sev-shared.c       | 374 +++++++++++++++++++++++++++++
 arch/x86/kernel/sev.c              |   4 +
 9 files changed, 394 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c
index 16ff5cb9a1fb..a1529a230ea7 100644
--- a/arch/x86/boot/compressed/efi.c
+++ b/arch/x86/boot/compressed/efi.c
@@ -176,3 +176,4 @@ efi_get_conf_table(struct boot_params *boot_params,
 
 	return 0;
 }
+
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index a2347ded77ea..1c1658693fc9 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -441,6 +441,7 @@ SYM_CODE_START(startup_64)
 .Lon_kernel_cs:
 
 	pushq	%rsi
+	movq	%rsi, %rdi		/* real mode address */
 	call	load_stage1_idt
 	popq	%rsi
 
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
index 9b93567d663a..1f6511a6625d 100644
--- a/arch/x86/boot/compressed/idt_64.c
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -3,6 +3,7 @@
 #include <asm/segment.h>
 #include <asm/trapnr.h>
 #include "misc.h"
+#include <asm/sev.h>
 
 static void set_idt_entry(int vector, void (*handler)(void))
 {
@@ -28,13 +29,15 @@ static void load_boot_idt(const struct desc_ptr *dtr)
 }
 
 /* Setup IDT before kernel jumping to  .Lrelocated */
-void load_stage1_idt(void)
+void load_stage1_idt(void *rmode)
 {
 	boot_idt_desc.address = (unsigned long)boot_idt;
 
 
-	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+		sev_snp_cpuid_init(rmode);
 		set_idt_entry(X86_TRAP_VC, boot_stage1_vc);
+	}
 
 	load_boot_idt(&boot_idt_desc);
 }
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 16b092fd7aa1..cdd328aa42c2 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -190,6 +190,7 @@ int efi_get_conf_table(struct boot_params *boot_params,
 		       unsigned long *conf_table_pa,
 		       unsigned int *conf_table_len,
 		       bool *is_efi_64);
+
 #else
 static inline int
 efi_find_vendor_table(unsigned long conf_table_pa, unsigned int conf_table_len,
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 6e8d97c280aa..910bf5cf010e 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -20,6 +20,9 @@
 #include <asm/fpu/xcr.h>
 #include <asm/ptrace.h>
 #include <asm/svm.h>
+#include <asm/cpuid.h>
+#include <linux/efi.h>
+#include <linux/log2.h>
 
 #include "error.h"
 
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 072540dfb129..5f134c172dbf 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -148,6 +148,8 @@ struct snp_psc_desc {
 #define GHCB_TERM_PSC			1	/* Page State Change failure */
 #define GHCB_TERM_PVALIDATE		2	/* Pvalidate failure */
 #define GHCB_TERM_NOT_VMPL0		3	/* SNP guest is not running at VMPL-0 */
+#define GHCB_TERM_CPUID			4	/* CPUID-validation failure */
+#define GHCB_TERM_CPUID_HV		5	/* CPUID failure during hypervisor fallback */
 
 #define GHCB_RESP_CODE(v)		((v) & GHCB_MSR_INFO_MASK)
 
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 534fa1c4c881..c73931548346 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <asm/insn.h>
 #include <asm/sev-common.h>
+#include <asm/bootparam.h>
 
 #define GHCB_PROTOCOL_MIN	1ULL
 #define GHCB_PROTOCOL_MAX	2ULL
@@ -126,6 +127,7 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op
 void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
 void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
 void snp_set_wakeup_secondary_cpu(void);
+void sev_snp_cpuid_init(struct boot_params *bp);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
@@ -141,6 +143,7 @@ static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz,
 static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
 static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
 static inline void snp_set_wakeup_secondary_cpu(void) { }
+static inline void sev_snp_cpuid_init(struct boot_params *bp) { }
 #endif
 
 #endif
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index ae4556925485..651980ddbd65 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -14,6 +14,25 @@
 #define has_cpuflag(f)	boot_cpu_has(f)
 #endif
 
+struct sev_snp_cpuid_fn {
+	u32 eax_in;
+	u32 ecx_in;
+	u64 unused;
+	u64 unused2;
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+	u64 reserved;
+} __packed;
+
+struct sev_snp_cpuid_info {
+	u32 count;
+	u32 reserved1;
+	u64 reserved2;
+	struct sev_snp_cpuid_fn fn[0];
+} __packed;
+
 /*
  * Since feature negotiation related variables are set early in the boot
  * process they must reside in the .data section so as not to be zeroed
@@ -26,6 +45,15 @@ static u16 __ro_after_init ghcb_version;
 /* Bitmap of SEV features supported by the hypervisor */
 u64 __ro_after_init sev_hv_features = 0;
 
+/*
+ * These are also stored in .data section to avoid the need to re-parse
+ * boot_params and re-determine CPUID memory range when .bss is cleared.
+ */
+static int sev_snp_cpuid_enabled __section(".data");
+static unsigned long sev_snp_cpuid_pa __section(".data");
+static unsigned long sev_snp_cpuid_sz __section(".data");
+static const struct sev_snp_cpuid_info *cpuid_info __section(".data");
+
 static bool __init sev_es_check_cpu_features(void)
 {
 	if (!has_cpuflag(X86_FEATURE_RDRAND)) {
@@ -236,6 +264,219 @@ static int sev_cpuid_hv(u32 func, u32 subfunc, u32 *eax, u32 *ebx,
 	return 0;
 }
 
+static bool sev_snp_cpuid_active(void)
+{
+	return sev_snp_cpuid_enabled;
+}
+
+static int sev_snp_cpuid_xsave_size(u64 xfeatures_en, u32 base_size,
+				    u32 *xsave_size, bool compacted)
+{
+	u64 xfeatures_found = 0;
+	int i;
+
+	*xsave_size = base_size;
+
+	for (i = 0; i < cpuid_info->count; i++) {
+		const struct sev_snp_cpuid_fn *fn = &cpuid_info->fn[i];
+
+		if (!(fn->eax_in == 0xd && fn->ecx_in > 1 && fn->ecx_in < 64))
+			continue;
+		if (!(xfeatures_en & (1UL << fn->ecx_in)))
+			continue;
+		if (xfeatures_found & (1UL << fn->ecx_in))
+			continue;
+
+		xfeatures_found |= (1UL << fn->ecx_in);
+		if (compacted)
+			*xsave_size += fn->eax;
+		else
+			*xsave_size = max(*xsave_size, fn->eax + fn->ebx);
+	}
+
+	/*
+	 * Either the guest set unsupported XCR0/XSS bits, or the corresponding
+	 * entries in the CPUID table were not present. This is not a valid
+	 * state to be in.
+	 */
+	if (xfeatures_found != (xfeatures_en & ~3ULL))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void sev_snp_cpuid_hv(u32 func, u32 subfunc, u32 *eax, u32 *ebx,
+			     u32 *ecx, u32 *edx)
+{
+	/*
+	 * Currently MSR protocol is sufficient to handle fallback cases, but
+	 * should that change make sure we terminate rather than grabbing random
+	 * values. Handling can be added in future to use GHCB-page protocol for
+	 * cases that occur late enough in boot that GHCB page is available
+	 */
+	if (cpuid_function_is_indexed(func) && subfunc != 0)
+		sev_es_terminate(1, GHCB_TERM_CPUID_HV);
+
+	if (sev_cpuid_hv(func, 0, eax, ebx, ecx, edx))
+		sev_es_terminate(1, GHCB_TERM_CPUID_HV);
+}
+
+static bool sev_snp_cpuid_find(u32 func, u32 subfunc, u32 *eax, u32 *ebx,
+			       u32 *ecx, u32 *edx)
+{
+	int i;
+	bool found = false;
+
+	for (i = 0; i < cpuid_info->count; i++) {
+		const struct sev_snp_cpuid_fn *fn = &cpuid_info->fn[i];
+
+		if (fn->eax_in != func)
+			continue;
+
+		if (cpuid_function_is_indexed(func) && fn->ecx_in != subfunc)
+			continue;
+
+		*eax = fn->eax;
+		*ebx = fn->ebx;
+		*ecx = fn->ecx;
+		*edx = fn->edx;
+		found = true;
+
+		break;
+	}
+
+	return found;
+}
+
+static bool sev_snp_cpuid_in_range(u32 func)
+{
+	int i;
+	u32 std_range_min = 0;
+	u32 std_range_max = 0;
+	u32 hyp_range_min = 0x40000000;
+	u32 hyp_range_max = 0;
+	u32 ext_range_min = 0x80000000;
+	u32 ext_range_max = 0;
+
+	for (i = 0; i < cpuid_info->count; i++) {
+		const struct sev_snp_cpuid_fn *fn = &cpuid_info->fn[i];
+
+		if (fn->eax_in == std_range_min)
+			std_range_max = fn->eax;
+		else if (fn->eax_in == hyp_range_min)
+			hyp_range_max = fn->eax;
+		else if (fn->eax_in == ext_range_min)
+			ext_range_max = fn->eax;
+	}
+
+	if ((func >= std_range_min && func <= std_range_max) ||
+	    (func >= hyp_range_min && func <= hyp_range_max) ||
+	    (func >= ext_range_min && func <= ext_range_max))
+		return true;
+
+	return false;
+}
+
+/*
+ * Returns -EOPNOTSUPP if feature not enabled. Any other return value should be
+ * treated as fatal by caller since we cannot fall back to hypervisor to fetch
+ * the values for security reasons (outside of the specific cases handled here)
+ */
+static int sev_snp_cpuid(u32 func, u32 subfunc, u32 *eax, u32 *ebx, u32 *ecx,
+			 u32 *edx)
+{
+	if (!sev_snp_cpuid_active())
+		return -EOPNOTSUPP;
+
+	if (!cpuid_info)
+		return -EIO;
+
+	if (!sev_snp_cpuid_find(func, subfunc, eax, ebx, ecx, edx)) {
+		/*
+		 * Some hypervisors will avoid keeping track of CPUID entries
+		 * where all values are zero, since they can be handled the
+		 * same as out-of-range values (all-zero). In our case, we want
+		 * to be able to distinguish between out-of-range entries and
+		 * in-range zero entries, since the CPUID table entries are
+		 * only a template that may need to be augmented with
+		 * additional values for things like CPU-specific information.
+		 * So if it's not in the table, but is still in the valid
+		 * range, proceed with the fix-ups below. Otherwise, just return
+		 * zeros.
+		 */
+		*eax = *ebx = *ecx = *edx = 0;
+		if (!sev_snp_cpuid_in_range(func))
+			goto out;
+	}
+
+	if (func == 0x1) {
+		u32 ebx2, edx2;
+
+		sev_snp_cpuid_hv(func, subfunc, NULL, &ebx2, NULL, &edx2);
+		/* initial APIC ID */
+		*ebx = (*ebx & 0x00FFFFFF) | (ebx2 & 0xFF000000);
+		/* APIC enabled bit */
+		*edx = (*edx & ~BIT_ULL(9)) | (edx2 & BIT_ULL(9));
+
+		/* OSXSAVE enabled bit */
+		if (native_read_cr4() & X86_CR4_OSXSAVE)
+			*ecx |= BIT_ULL(27);
+	} else if (func == 0x7) {
+		/* OSPKE enabled bit */
+		*ecx &= ~BIT_ULL(4);
+		if (native_read_cr4() & X86_CR4_PKE)
+			*ecx |= BIT_ULL(4);
+	} else if (func == 0xB) {
+		/* extended APIC ID */
+		sev_snp_cpuid_hv(func, 0, NULL, NULL, NULL, edx);
+	} else if (func == 0xd && (subfunc == 0x0 || subfunc == 0x1)) {
+		bool compacted = false;
+		u64 xcr0 = 1, xss = 0;
+		u32 xsave_size;
+
+		if (native_read_cr4() & X86_CR4_OSXSAVE)
+			xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+		if (subfunc == 1) {
+			/* boot/compressed doesn't set XSS so 0 is fine there */
+#ifndef __BOOT_COMPRESSED
+			if (*eax & 0x8) /* XSAVES */
+				if (boot_cpu_has(X86_FEATURE_XSAVES))
+					rdmsrl(MSR_IA32_XSS, xss);
+#endif
+			/*
+			 * The PPR and APM aren't clear on what size should be
+			 * encoded in 0xD:0x1:EBX when compaction is not enabled
+			 * by either XSAVEC or XSAVES since SNP-capable hardware
+			 * has the entries fixed as 1. KVM sets it to 0 in this
+			 * case, but to avoid this becoming an issue it's safer
+			 * to simply treat this as unsupported or SNP guests.
+			 */
+			if (!(*eax & 0xA)) /* (XSAVEC|XSAVES) */
+				return -EINVAL;
+
+			compacted = true;
+		}
+
+		if (sev_snp_cpuid_xsave_size(xcr0 | xss, *ebx, &xsave_size,
+					     compacted))
+			return -EINVAL;
+
+		*ebx = xsave_size;
+	} else if (func == 0x8000001E) {
+		u32 ebx2, ecx2;
+
+		/* extended APIC ID */
+		sev_snp_cpuid_hv(func, subfunc, eax, &ebx2, &ecx2, NULL);
+		/* compute ID */
+		*ebx = (*ebx & 0xFFFFFFF00) | (ebx2 & 0x000000FF);
+		/* node ID */
+		*ecx = (*ecx & 0xFFFFFFF00) | (ecx2 & 0x000000FF);
+	}
+
+out:
+	return 0;
+}
+
 /*
  * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
  * page yet, so it only supports the MSR based communication with the
@@ -244,15 +485,25 @@ static int sev_cpuid_hv(u32 func, u32 subfunc, u32 *eax, u32 *ebx,
 void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
 {
 	unsigned int fn = lower_bits(regs->ax, 32);
+	unsigned int subfn = lower_bits(regs->cx, 32);
 	u32 eax, ebx, ecx, edx;
+	int ret;
 
 	/* Only CPUID is supported via MSR protocol */
 	if (exit_code != SVM_EXIT_CPUID)
 		goto fail;
 
+	ret = sev_snp_cpuid(fn, subfn, &eax, &ebx, &ecx, &edx);
+	if (ret == 0)
+		goto out;
+
+	if (ret != -EOPNOTSUPP)
+		goto fail;
+
 	if (sev_cpuid_hv(fn, 0, &eax, &ebx, &ecx, &edx))
 		goto fail;
 
+out:
 	regs->ax = eax;
 	regs->bx = ebx;
 	regs->cx = ecx;
@@ -552,6 +803,19 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
 	struct pt_regs *regs = ctxt->regs;
 	u32 cr4 = native_read_cr4();
 	enum es_result ret;
+	u32 eax, ebx, ecx, edx;
+	int cpuid_ret;
+
+	cpuid_ret = sev_snp_cpuid(regs->ax, regs->cx, &eax, &ebx, &ecx, &edx);
+	if (cpuid_ret == 0) {
+		regs->ax = eax;
+		regs->bx = ebx;
+		regs->cx = ecx;
+		regs->dx = edx;
+		return ES_OK;
+	}
+	if (cpuid_ret != -EOPNOTSUPP)
+		return ES_VMM_ERROR;
 
 	ghcb_set_rax(ghcb, regs->ax);
 	ghcb_set_rcx(ghcb, regs->cx);
@@ -603,3 +867,113 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
 
 	return ES_OK;
 }
+
+#ifdef BOOT_COMPRESSED
+static struct setup_data *get_cc_setup_data(struct boot_params *bp)
+{
+	struct setup_data *hdr = (struct setup_data *)bp->hdr.setup_data;
+
+	while (hdr) {
+		if (hdr->type == SETUP_CC_BLOB)
+			return hdr;
+		hdr = (struct setup_data *)hdr->next;
+	}
+
+	return NULL;
+}
+
+/*
+ * For boot/compressed kernel:
+ *
+ *   1) Search for CC blob in the following order/precedence:
+ *      - via linux boot protocol / setup_data entry
+ *      - via EFI configuration table
+ *   2) Return a pointer to the CC blob, NULL otherwise.
+ */
+static struct cc_blob_sev_info *sev_snp_probe_cc_blob(struct boot_params *bp)
+{
+	struct cc_blob_sev_info *cc_info = NULL;
+	struct setup_data_cc {
+		struct setup_data header;
+		u32 cc_blob_address;
+	} *sd;
+	unsigned long conf_table_pa;
+	unsigned int conf_table_len;
+	bool efi_64;
+
+	/* Try to get CC blob via setup_data */
+	sd = (struct setup_data_cc *)get_cc_setup_data(bp);
+	if (sd) {
+		cc_info = (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
+		goto out_verify;
+	}
+
+	/* CC blob isn't in setup_data, see if it's in the EFI config table */
+	if (!efi_get_conf_table(bp, &conf_table_pa, &conf_table_len, &efi_64))
+		(void)efi_find_vendor_table(conf_table_pa, conf_table_len,
+					    EFI_CC_BLOB_GUID, efi_64,
+					    (unsigned long *)&cc_info);
+
+out_verify:
+	/* CC blob should be either valid or not present. Fail otherwise. */
+	if (cc_info && cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+		sev_es_terminate(1, GHCB_SNP_UNSUPPORTED);
+
+	return cc_info;
+}
+#else
+/*
+ * Probing for CC blob for run-time kernel will be enabled in a subsequent
+ * patch. For now we need to stub this out.
+ */
+static struct cc_blob_sev_info *sev_snp_probe_cc_blob(struct boot_params *bp)
+{
+	return NULL;
+}
+#endif
+
+/*
+ * Initial set up of CPUID table when running identity-mapped.
+ *
+ * NOTE: Since SEV_SNP feature partly relies on CPUID checks that can't
+ * happen until we access CPUID page, we skip the check and hope the
+ * bootloader is providing sane values. Current code relies on all CPUID
+ * page lookups originating from #VC handler, which at least provides
+ * indication that SEV-ES is enabled. Subsequent init levels will check for
+ * SEV_SNP feature once available to also take SEV MSR value into account.
+ */
+void sev_snp_cpuid_init(struct boot_params *bp)
+{
+	struct cc_blob_sev_info *cc_info;
+
+	if (!bp)
+		sev_es_terminate(1, GHCB_TERM_CPUID);
+
+	cc_info = sev_snp_probe_cc_blob(bp);
+
+	if (!cc_info)
+		return;
+
+	sev_snp_cpuid_pa = cc_info->cpuid_phys;
+	sev_snp_cpuid_sz = cc_info->cpuid_len;
+
+	/*
+	 * These should always be valid values for SNP, even if guest isn't
+	 * actually configured to use the CPUID table.
+	 */
+	if (!sev_snp_cpuid_pa || sev_snp_cpuid_sz < PAGE_SIZE)
+		sev_es_terminate(1, GHCB_TERM_CPUID);
+
+	cpuid_info = (const struct sev_snp_cpuid_info *)sev_snp_cpuid_pa;
+
+	/*
+	 * We should be able to trust the 'count' value in the CPUID table
+	 * area, but ensure it agrees with CC blob value to be safe.
+	 */
+	if (sev_snp_cpuid_sz < (sizeof(struct sev_snp_cpuid_info) +
+				sizeof(struct sev_snp_cpuid_fn) *
+				cpuid_info->count))
+		sev_es_terminate(1, GHCB_TERM_CPUID);
+
+	sev_snp_cpuid_enabled = 1;
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index ddf8ced4a879..d7b6f7420551 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -19,6 +19,8 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/cpumask.h>
+#include <linux/log2.h>
+#include <linux/efi.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
@@ -32,6 +34,8 @@
 #include <asm/smp.h>
 #include <asm/cpu.h>
 #include <asm/apic.h>
+#include <asm/efi.h>
+#include <asm/cpuid.h>
 
 #include "sev-internal.h"
 
-- 
2.17.1




[Index of Archives]     [Linux Kernel Development]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux