#ifdef CONFIG_X86_5LEVEL
unsigned int __pgtable_l5_enabled __ro_after_init;
unsigned int pgdir_shift __ro_after_init = 39;
@@ -112,6 +115,7 @@ static bool __head check_la57_support(unsigned long physaddr)
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
+ unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -234,6 +238,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
+ /* Clear the memory encryption mask from the decrypted section */
+ vaddr = (unsigned long)__start_data_decrypted;
+ vaddr_end = (unsigned long)__end_data_decrypted;
+ for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+ i = pmd_index(vaddr);
+ pmd[i] -= sme_get_me_mask();
+ }
+
/*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a4..511b875 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -89,6 +89,22 @@ PHDRS {
note PT_NOTE FLAGS(0); /* ___ */
}
+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. But we make this section a pmd
+ * aligned to avoid spliting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define DATA_DECRYPTED_SECTION \
+ . = ALIGN(PMD_SIZE); \
+ __start_data_decrypted = .; \
+ *(.data..decrypted); \
+ __end_data_decrypted = .; \
+ . = ALIGN(PMD_SIZE); \
+
+
SECTIONS
{
#ifdef CONFIG_X86_32
@@ -171,6 +187,8 @@ SECTIONS
/* rarely changed data like cpu maps */
READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
+ DATA_DECRYPTED_SECTION
+
/* End of data section */
_edata = .;
} :data
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 7ae3686..ccf6e2b 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -59,6 +59,8 @@
(_PAGE_PAT | _PAGE_PWT))
#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
+#define PTE_FLAGS_ENC_WP ((PTE_FLAGS_ENC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
struct sme_populate_pgd_data {
void *pgtable_area;
@@ -72,10 +74,28 @@ struct sme_populate_pgd_data {
unsigned long vaddr_end;
};
+struct sme_workarea_data {
+ unsigned long kernel_start;
+ unsigned long kernel_end;
+ unsigned long kernel_len;
+
+ unsigned long initrd_start;
+ unsigned long initrd_end;
+ unsigned long initrd_len;
+
+ unsigned long workarea_start;
+ unsigned long workarea_end;
+ unsigned long workarea_len;
+
+ unsigned long decrypted_base;
+};
+
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
static char sme_cmdline_off[] __initdata = "off";
+extern char __start_data_decrypted[], __end_data_decrypted[];
+
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
@@ -219,6 +239,11 @@ static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}
+static void __init sme_map_range_encrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC_WP);
+}
+
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
@@ -266,19 +291,17 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return entries + tables;
}
-void __init sme_encrypt_kernel(struct boot_params *bp)
+static void __init build_workarea_map(struct boot_params *bp,
+ struct sme_workarea_data *wa,
+ struct sme_populate_pgd_data *ppd)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
unsigned long initrd_start, initrd_end, initrd_len;
- struct sme_populate_pgd_data ppd;
unsigned long pgtable_area_len;
unsigned long decrypted_base;
- if (!sme_active())
- return;
-
/*
* Prepare for encrypting the kernel and initrd by building new
* pagetables with the necessary attributes needed to encrypt the
@@ -358,17 +381,17 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* pagetables and when the new encrypted and decrypted kernel
* mappings are populated.
*/
- ppd.pgtable_area = (void *)execute_end;
+ ppd->pgtable_area = (void *)execute_end;
/*
* Make sure the current pagetable structure has entries for
* addressing the workarea.
*/
- ppd.pgd = (pgd_t *)native_read_cr3_pa();
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start;
- ppd.vaddr_end = workarea_end;
- sme_map_range_decrypted(&ppd);
+ ppd->pgd = (pgd_t *)native_read_cr3_pa();
+ ppd->paddr = workarea_start;
+ ppd->vaddr = workarea_start;
+ ppd->vaddr_end = workarea_end;
+ sme_map_range_decrypted(ppd);
/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
@@ -379,9 +402,9 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* then be populated with new PUDs and PMDs as the encrypted and
* decrypted kernel mappings are created.
*/
- ppd.pgd = ppd.pgtable_area;
- memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
- ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
+ ppd->pgd = ppd->pgtable_area;
+ memset(ppd->pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+ ppd->pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
/*
* A different PGD index/entry must be used to get different
@@ -399,75 +422,158 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
decrypted_base <<= PGDIR_SHIFT;
/* Add encrypted kernel (identity) mappings */
- ppd.paddr = kernel_start;
- ppd.vaddr = kernel_start;
- ppd.vaddr_end = kernel_end;
- sme_map_range_encrypted(&ppd);
+ ppd->paddr = kernel_start;
+ ppd->vaddr = kernel_start;
+ ppd->vaddr_end = kernel_end;
+ sme_map_range_encrypted(ppd);
/* Add decrypted, write-protected kernel (non-identity) mappings */
- ppd.paddr = kernel_start;
- ppd.vaddr = kernel_start + decrypted_base;
- ppd.vaddr_end = kernel_end + decrypted_base;
- sme_map_range_decrypted_wp(&ppd);
+ ppd->paddr = kernel_start;
+ ppd->vaddr = kernel_start + decrypted_base;
+ ppd->vaddr_end = kernel_end + decrypted_base;
+ sme_map_range_decrypted_wp(ppd);
if (initrd_len) {
/* Add encrypted initrd (identity) mappings */
- ppd.paddr = initrd_start;
- ppd.vaddr = initrd_start;
- ppd.vaddr_end = initrd_end;
- sme_map_range_encrypted(&ppd);
+ ppd->paddr = initrd_start;
+ ppd->vaddr = initrd_start;
+ ppd->vaddr_end = initrd_end;
+ sme_map_range_encrypted(ppd);
/*
* Add decrypted, write-protected initrd (non-identity) mappings
*/
- ppd.paddr = initrd_start;
- ppd.vaddr = initrd_start + decrypted_base;
- ppd.vaddr_end = initrd_end + decrypted_base;
- sme_map_range_decrypted_wp(&ppd);
+ ppd->paddr = initrd_start;
+ ppd->vaddr = initrd_start + decrypted_base;
+ ppd->vaddr_end = initrd_end + decrypted_base;
+ sme_map_range_decrypted_wp(ppd);
}
- /* Add decrypted workarea mappings to both kernel mappings */
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start;
- ppd.vaddr_end = workarea_end;
- sme_map_range_decrypted(&ppd);
+ /*
+ * When SEV is active, kernel is already encrypted hence mapping
+ * the initial workarea_start as encrypted. When SME is active,
+ * the kernel is not encrypted hence add a decrypted workarea
+ * mappings to both kernel mappings
+ */
+ ppd->paddr = workarea_start;
+ ppd->vaddr = workarea_start;
+ ppd->vaddr_end = workarea_end;
+ if (sev_active())
+ sme_map_range_encrypted(ppd);
+ else
+ sme_map_range_decrypted(ppd);
+
+ ppd->paddr = workarea_start;
+ ppd->vaddr = workarea_start + decrypted_base;
+ ppd->vaddr_end = workarea_end + decrypted_base;
+ sme_map_range_decrypted(ppd);