kvmclock defines few static variables which are shared with the hypervisor during the kvmclock initialization. When SEV is active, memory is encrypted with a guest-specific key, and if guest OS wants to share the memory region with hypervisor then it must clear the C-bit before sharing it. Currently, we use kernel_physical_mapping_init() to split large pages before clearing the C-bit on shared pages. But it fails when called from the kvmclock initialization (mainly because memblock allocator is not ready that early during boot). Add a __decrypted section attribute which can be used when defining such shared variable. The so-defined variables will be placed in the .data..decrypted section. This section is mapped with C=0 early during boot, we also ensure that the initialized values are updated to match with C=0 (i.e perform an in-place decryption). The .data..decrypted section is PMD-aligned and sized so that we avoid the need to split the large pages when mapping the section. The sme_encrypt_kernel() was used to perform the in-place encryption of the Linux kernel and initrd when SME is active. The routine has been enhanced to decrypt the .data..decrypted section for both SME and SEV cases. Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> Cc: Tom Lendacky <thomas.lendacky@xxxxxxx> Cc: kvm@xxxxxxxxxxxxxxx Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: linux-kernel@xxxxxxxxxxxxxxx Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> Cc: kvm@xxxxxxxxxxxxxxx Cc: "Radim Krčmář" <rkrcmar@xxxxxxxxxx> --- arch/x86/include/asm/mem_encrypt.h | 6 +++ arch/x86/kernel/head64.c | 11 +++++ arch/x86/kernel/vmlinux.lds.S | 17 +++++++ arch/x86/mm/mem_encrypt_identity.c | 94 ++++++++++++++++++++++++++++++++------ 4 files changed, 113 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c064383..802b2eb 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -52,6 +52,8 @@ void __init mem_encrypt_init(void); bool sme_active(void); bool sev_active(void); +#define __decrypted __attribute__((__section__(".data..decrypted"))) + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -77,6 +79,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +#define __decrypted + #endif /* CONFIG_AMD_MEM_ENCRYPT */ /* @@ -88,6 +92,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; #define __sme_pa(x) (__pa(x) | sme_me_mask) #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) +extern char __start_data_decrypted[], __end_data_decrypted[]; + #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8047379..af39d68 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -112,6 +112,7 @@ static bool __head check_la57_support(unsigned long physaddr) unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { + unsigned long vaddr, vaddr_end; unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; @@ -234,6 +235,16 @@ unsigned long __head __startup_64(unsigned long physaddr, /* Encrypt the kernel and related (if SME is active) */ sme_encrypt_kernel(bp); + /* Clear the memory encryption mask from the .data..decrypted section. */ + if (mem_encrypt_active()) { + vaddr = (unsigned long)__start_data_decrypted; + vaddr_end = (unsigned long)__end_data_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + i = pmd_index(vaddr); + pmd[i] -= sme_get_me_mask(); + } + } + /* * Return the SME encryption mask (if SME is active) to be used as a * modifier for the initial pgdir entry programmed into CR3. diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8bde0a4..78d3169 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -89,6 +89,21 @@ PHDRS { note PT_NOTE FLAGS(0); /* ___ */ } +/* + * This section contains data which will be mapped as decrypted. Memory + * encryption operates on a page basis. Make this section PMD-aligned + * to avoid spliting the pages while mapping the section early. + * + * Note: We use a separate section so that only this section gets + * decrypted to avoid exposing more than we wish. + */ +#define DATA_DECRYPTED \ + . = ALIGN(PMD_SIZE); \ + __start_data_decrypted = .; \ + *(.data..decrypted); \ + . = ALIGN(PMD_SIZE); \ + __end_data_decrypted = .; \ + SECTIONS { #ifdef CONFIG_X86_32 @@ -171,6 +186,8 @@ SECTIONS /* rarely changed data like cpu maps */ READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) + DATA_DECRYPTED + /* End of data section */ _edata = .; } :data diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 7659e65..08e70ba 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -51,6 +51,8 @@ (_PAGE_PAT | _PAGE_PWT)) #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) +#define PMD_FLAGS_ENC_WP ((PMD_FLAGS_ENC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) #define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) @@ -59,6 +61,8 @@ (_PAGE_PAT | _PAGE_PWT)) #define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) +#define PTE_FLAGS_ENC_WP ((PTE_FLAGS_ENC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) struct sme_populate_pgd_data { void *pgtable_area; @@ -231,6 +235,11 @@ static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); } +static void __init sme_map_range_encrypted_wp(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_ENC_WP, PTE_FLAGS_ENC_WP); +} + static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); @@ -378,7 +387,10 @@ static void __init build_workarea_map(struct boot_params *bp, ppd->paddr = workarea_start; ppd->vaddr = workarea_start; ppd->vaddr_end = workarea_end; - sme_map_range_decrypted(ppd); + if (sev_active()) + sme_map_range_encrypted(ppd); + else + sme_map_range_decrypted(ppd); /* Flush the TLB - no globals so cr3 is enough */ native_write_cr3(__native_read_cr3()); @@ -435,16 +447,27 @@ static void __init build_workarea_map(struct boot_params *bp, sme_map_range_decrypted_wp(ppd); } - /* Add decrypted workarea mappings to both kernel mappings */ + /* + * When SEV is active, kernel is already encrypted hence mapping + * the initial workarea_start as encrypted. When SME is active, + * the kernel is not encrypted hence add decrypted workarea + * mappings to both kernel mappings. + */ ppd->paddr = workarea_start; ppd->vaddr = workarea_start; ppd->vaddr_end = workarea_end; - sme_map_range_decrypted(ppd); + if (sev_active()) + sme_map_range_encrypted(ppd); + else + sme_map_range_decrypted(ppd); ppd->paddr = workarea_start; ppd->vaddr = workarea_start + decrypted_base; ppd->vaddr_end = workarea_end + decrypted_base; - sme_map_range_decrypted(ppd); + if (sev_active()) + sme_map_range_encrypted(ppd); + else + sme_map_range_decrypted(ppd); wa->kernel_start = kernel_start; wa->kernel_end = kernel_end; @@ -487,28 +510,69 @@ static void __init teardown_workarea_map(struct sme_workarea_data *wa, native_write_cr3(__native_read_cr3()); } +static void __init decrypt_shared_data(struct sme_workarea_data *wa, + struct sme_populate_pgd_data *ppd) +{ + unsigned long decrypted_start, decrypted_end, decrypted_len; + + /* Physical addresses of decrypted data section */ + decrypted_start = __pa_symbol(__start_data_decrypted); + decrypted_end = ALIGN(__pa_symbol(__end_data_decrypted), PMD_PAGE_SIZE); + decrypted_len = decrypted_end - decrypted_start; + + if (!decrypted_len) + return; + + /* Add decrypted mapping for the section (identity) */ + ppd->paddr = decrypted_start; + ppd->vaddr = decrypted_start; + ppd->vaddr_end = decrypted_end; + sme_map_range_decrypted(ppd); + + /* Add encrypted-wp mapping for the section (non-identity) */ + ppd->paddr = decrypted_start; + ppd->vaddr = decrypted_start + wa->decrypted_base; + ppd->vaddr_end = decrypted_end + wa->decrypted_base; + sme_map_range_encrypted_wp(ppd); + + /* Perform in-place decryption */ + sme_encrypt_execute(decrypted_start, + decrypted_start + wa->decrypted_base, + decrypted_len, wa->workarea_start, + (unsigned long)ppd->pgd); + + ppd->vaddr = decrypted_start + wa->decrypted_base; + ppd->vaddr_end = decrypted_end + wa->decrypted_base; + sme_clear_pgd(ppd); +} + void __init sme_encrypt_kernel(struct boot_params *bp) { struct sme_populate_pgd_data ppd; struct sme_workarea_data wa; - if (!sme_active()) + if (!mem_encrypt_active()) return; build_workarea_map(bp, &wa, &ppd); - /* When SEV is active, encrypt kernel and initrd */ - sme_encrypt_execute(wa.kernel_start, - wa.kernel_start + wa.decrypted_base, - wa.kernel_len, wa.workarea_start, - (unsigned long)ppd.pgd); - - if (wa.initrd_len) - sme_encrypt_execute(wa.initrd_start, - wa.initrd_start + wa.decrypted_base, - wa.initrd_len, wa.workarea_start, + /* When SME is active, encrypt kernel and initrd */ + if (sme_active()) { + sme_encrypt_execute(wa.kernel_start, + wa.kernel_start + wa.decrypted_base, + wa.kernel_len, wa.workarea_start, (unsigned long)ppd.pgd); + if (wa.initrd_len) + sme_encrypt_execute(wa.initrd_start, + wa.initrd_start + wa.decrypted_base, + wa.initrd_len, wa.workarea_start, + (unsigned long)ppd.pgd); + } + + /* Decrypt the contents of .data..decrypted section */ + decrypt_shared_data(&wa, &ppd); + teardown_workarea_map(&wa, &ppd); } -- 2.7.4