I have root caused the memory corruption panics/hangs that I've been experiencing during boot with the latest 4.4.110 kernel. The problem as was suspected by Andy Lutomirski is with interaction between PTI and EFI. It may affect any system that has EFI bios. I have not verified if it can affect any other kernel beside 4.4.110 Attached is the fix for this issue with explanations that Steve Sistare and I developed.
From 1189f3568a90ddd40e1418b9687def5d89153ee3 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Date: Thu, 11 Jan 2018 06:50:25 -0800 Subject: [PATCH] x86/pti/efi: broken conversion from efi to kernel page table In entry_64.S we have code like this: /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax movq %rax, %cr3 2: /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ call do_nmi With this instruction: andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax We unconditionally switch from whatever our CR3 was to kernel page table. But, in arch/x86/platform/efi/efi_64.c We temporarily set a different page table, that does not have the kernel page table with 0x1000 offset from it. Look in efi_thunk() and efi_thunk_set_virtual_address_map(). So, while CR3 points to the other page table, we get an NMI interrupt, and clear 0x1000 from CR3, resulting in a bogus CR3 if the 0x1000 bit was set. The efi page table comes from realmode/rm/trampoline_64.S: arch/x86/realmode/rm/trampoline_64.S 141 .bss 142 .balign PAGE_SIZE 143 GLOBAL(trampoline_pgd) .space PAGE_SIZE Notice: alignment is PAGE_SIZE, so after applying KAISER_SHADOW_PGD_OFFSET which equal to PAGE_SIZE, we can get a different page table. But, even if we fix alignment, here the trampoline binary is later copied into dynamically allocated memory in reserve_real_mode(), so we need to fix that place as well. Fixes: 8a43ddfb93a0 ("KAISER: Kernel Address Isolation") Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Reviewed-by: Steven Sistare <steven.sistare@xxxxxxxxxx> --- arch/x86/include/asm/kaiser.h | 8 ++++++++ arch/x86/realmode/init.c | 4 +++- arch/x86/realmode/rm/trampoline_64.S | 3 ++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 802bbbdfe143..e087bd7a8d29 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -19,6 +19,12 @@ #define KAISER_SHADOW_PGD_OFFSET 0x1000 +/* + * A page table address must have this alignment to stay the same when + * KAISER_SHADOW_PGD_OFFSET mask is applied + */ +#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1) + #ifdef __ASSEMBLY__ #ifdef CONFIG_PAGE_TABLE_ISOLATION @@ -71,6 +77,8 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax #else /* CONFIG_PAGE_TABLE_ISOLATION */ +#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE + .macro SWITCH_KERNEL_CR3 .endm .macro SWITCH_USER_CR3 diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 0b7a63d98440..cfecb7d6c6a8 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -1,5 +1,6 @@ #include <linux/io.h> #include <linux/memblock.h> +#include <linux/kaiser.h> #include <asm/cacheflush.h> #include <asm/pgtable.h> @@ -15,7 +16,8 @@ void __init reserve_real_mode(void) size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); /* Has to be under 1M so we can execute real-mode AP code. */ - mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); + mem = memblock_find_in_range(0, 1 << 20, size, + KAISER_KERNEL_PGD_ALIGNMENT); if (!mem) panic("Cannot allocate trampoline\n"); diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index dac7b20d2f9d..781cca63f795 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -30,6 +30,7 @@ #include <asm/msr.h> #include <asm/segment.h> #include <asm/processor-flags.h> +#include <asm/kaiser.h> #include "realmode.h" .text @@ -139,7 +140,7 @@ tr_gdt: tr_gdt_end: .bss - .balign PAGE_SIZE + .balign KAISER_KERNEL_PGD_ALIGNMENT GLOBAL(trampoline_pgd) .space PAGE_SIZE .balign 8 -- 1.8.3.1