On 6/15/2017 5:03 AM, Borislav Petkov wrote: > On Wed, Jun 07, 2017 at 02:18:27PM -0500, Tom Lendacky wrote: >> Provide support so that kexec can be used to boot a kernel when SME is >> enabled. >> >> Support is needed to allocate pages for kexec without encryption. This >> is needed in order to be able to reboot in the kernel in the same manner >> as originally booted. >> >> Additionally, when shutting down all of the CPUs we need to be sure to >> flush the caches and then halt. This is needed when booting from a state >> where SME was not active into a state where SME is active (or vice-versa). >> Without these steps, it is possible for cache lines to exist for the same >> physical location but tagged both with and without the encryption bit. This >> can cause random memory corruption when caches are flushed depending on >> which cacheline is written last. >> >> Signed-off-by: Tom Lendacky <thomas.lendacky at amd.com> >> --- >> arch/x86/include/asm/init.h | 1 + >> arch/x86/include/asm/kexec.h | 8 ++++++++ >> arch/x86/include/asm/pgtable_types.h | 1 + >> arch/x86/kernel/machine_kexec_64.c | 35 +++++++++++++++++++++++++++++++++- >> arch/x86/kernel/process.c | 17 +++++++++++++++-- >> arch/x86/mm/ident_map.c | 12 ++++++++---- >> include/linux/kexec.h | 14 ++++++++++++++ >> kernel/kexec_core.c | 6 ++++++ >> 8 files changed, 87 insertions(+), 7 deletions(-) >> >> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h >> index 474eb8c..05c4aa0 100644 >> --- a/arch/x86/include/asm/init.h >> +++ b/arch/x86/include/asm/init.h >> @@ -7,6 +7,7 @@ struct x86_mapping_info { >> unsigned long page_flag; /* page flag for PMD or PUD entry */ >> unsigned long offset; /* ident mapping offset */ >> bool direct_gbpages; /* PUD level 1GB page support */ >> + unsigned long kernpg_flag; /* kernel pagetable flag override */ >> }; >> >> int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, >> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h >> index 70ef205..e8183ac 100644 >> --- a/arch/x86/include/asm/kexec.h >> +++ b/arch/x86/include/asm/kexec.h >> @@ -207,6 +207,14 @@ struct kexec_entry64_regs { >> uint64_t r15; >> uint64_t rip; >> }; >> + >> +extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, >> + gfp_t gfp); >> +#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages >> + >> +extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); >> +#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages >> + >> #endif >> >> typedef void crash_vmclear_fn(void); >> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h >> index ce8cb1c..0f326f4 100644 >> --- a/arch/x86/include/asm/pgtable_types.h >> +++ b/arch/x86/include/asm/pgtable_types.h >> @@ -213,6 +213,7 @@ enum page_cache_mode { >> #define PAGE_KERNEL __pgprot(__PAGE_KERNEL | _PAGE_ENC) >> #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) >> #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) >> +#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC) >> #define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) >> #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) >> #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) >> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c >> index 6f5ca4e..35e069a 100644 >> --- a/arch/x86/kernel/machine_kexec_64.c >> +++ b/arch/x86/kernel/machine_kexec_64.c >> @@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) >> set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); >> } >> pte = pte_offset_kernel(pmd, vaddr); >> - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); >> + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); >> return 0; >> err: >> free_transition_pgtable(image); >> @@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) >> .alloc_pgt_page = alloc_pgt_page, >> .context = image, >> .page_flag = __PAGE_KERNEL_LARGE_EXEC, >> + .kernpg_flag = _KERNPG_TABLE_NOENC, >> }; >> unsigned long mstart, mend; >> pgd_t *level4p; >> @@ -602,3 +603,35 @@ void arch_kexec_unprotect_crashkres(void) >> { >> kexec_mark_crashkres(false); >> } >> + >> +int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) >> +{ >> + int ret; >> + >> + if (sme_active()) { > > What happened to flipping the logic and saving an indentation level here? > >> + /* >> + * If SME is active we need to be sure that kexec pages are >> + * not encrypted because when we boot to the new kernel the >> + * pages won't be accessed encrypted (initially). >> + */ >> + ret = set_memory_decrypted((unsigned long)vaddr, pages); >> + if (ret) >> + return ret; >> + >> + if (gfp & __GFP_ZERO) >> + memset(vaddr, 0, pages * PAGE_SIZE); >> + } > > This is still zeroing the memory a second time. That function has missed > all my comments from last time. Hmmm... not sure what happened, I thought I made changes here. I'll take care of it. Thanks, Tom >