On Thu, Nov 12, 2015 at 03:40:21PM +0000, Matt Fleming wrote: > This change is a prerequisite for pending patches that switch to a > dedicated EFI page table, instead of using 'trampoline_pgd' which > shares PGD entries with 'swapper_pg_dir'. The pending patches make it > impossible to dereference the runtime service function pointer without > first switching %cr3. > > It's true that we now have duplicated switching code in > efi_call_virt() and efi_call_phys_{prolog,epilog}() but we are > sacrificing code duplication for a little more clarity and the ease of > writing the page table switching code in C instead of asm. > > Cc: Borislav Petkov <bp@xxxxxxxxx> > Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@xxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> > Cc: Dave Jones <davej@xxxxxxxxxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: H. Peter Anvin <hpa@xxxxxxxxx> > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Cc: Andy Lutomirski <luto@xxxxxxxxxx> > Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>, > Cc: Stephen Smalley <sds@xxxxxxxxxxxxx> > Signed-off-by: Matt Fleming <matt@xxxxxxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/efi.h | 25 +++++++++++++++++++++ > arch/x86/platform/efi/efi_64.c | 24 ++++++++++----------- > arch/x86/platform/efi/efi_stub_64.S | 43 ------------------------------------- > 3 files changed, 36 insertions(+), 56 deletions(-) > > diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h > index cfee9d4b02af..f9d99d4e7b1a 100644 > --- a/arch/x86/include/asm/efi.h > +++ b/arch/x86/include/asm/efi.h > @@ -3,6 +3,7 @@ > > #include <asm/fpu/api.h> > #include <asm/pgtable.h> > +#include <asm/tlb.h> > > /* > * We map the EFI regions needed for runtime services non-contiguously, > @@ -64,6 +65,17 @@ extern u64 asmlinkage efi_call(void *fp, ...); > > #define efi_call_phys(f, args...) efi_call((f), args) > > +/* > + * Scratch space used for switching the pagetable in the EFI stub > + */ > +struct efi_scratch { > + u64 r15; > + u64 prev_cr3; > + pgd_t *efi_pgt; > + bool use_pgd; > + u64 phys_stack; > +} __packed; > + > #define efi_call_virt(f, ...) \ > ({ \ > efi_status_t __s; \ > @@ -71,7 +83,20 @@ extern u64 asmlinkage efi_call(void *fp, ...); > efi_sync_low_kernel_mappings(); \ > preempt_disable(); \ > __kernel_fpu_begin(); \ > + \ > + if (efi_scratch.use_pgd) { \ > + efi_scratch.prev_cr3 = read_cr3(); \ > + write_cr3((unsigned long)efi_scratch.efi_pgt); \ > + __flush_tlb_all(); \ > + } \ > + \ > __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ > + \ > + if (efi_scratch.use_pgd) { \ > + write_cr3(efi_scratch.prev_cr3); \ > + __flush_tlb_all(); \ > + } \ > + \ > __kernel_fpu_end(); \ > preempt_enable(); \ > __s; \ > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c > index 634536034e32..ab5f14a886cc 100644 > --- a/arch/x86/platform/efi/efi_64.c > +++ b/arch/x86/platform/efi/efi_64.c > @@ -47,16 +47,7 @@ > */ > static u64 efi_va = EFI_VA_START; > > -/* > - * Scratch space used for switching the pagetable in the EFI stub > - */ > -struct efi_scratch { > - u64 r15; > - u64 prev_cr3; > - pgd_t *efi_pgt; > - bool use_pgd; > - u64 phys_stack; > -} __packed; > +struct efi_scratch efi_scratch; > > static void __init early_code_mapping_set_exec(int executable) > { > @@ -83,8 +74,11 @@ pgd_t * __init efi_call_phys_prolog(void) > int pgd; > int n_pgds; > > - if (!efi_enabled(EFI_OLD_MEMMAP)) > - return NULL; > + if (!efi_enabled(EFI_OLD_MEMMAP)) { > + save_pgd = (pgd_t *)read_cr3(); > + write_cr3((unsigned long)efi_scratch.efi_pgt); > + goto out; > + } > > early_code_mapping_set_exec(1); > So this one is called in phys_efi_set_virtual_address_map() like this: ---- save_pgd = efi_call_phys_prolog(); /* Disable interrupts around EFI calls: */ local_irq_save(flags); <--- MARKER status = efi_call_phys(efi_phys.set_virtual_address_map, memory_map_size, descriptor_size, descriptor_version, virtual_map); local_irq_restore(flags); efi_call_phys_epilog(save_pgd); --- Now, if you look at MARKER, the asm looks like this here: .loc 1 91 0 call efi_call_phys_prolog # movq %rax, %r15 #, save_pgd .file 6 "./arch/x86/include/asm/irqflags.h" .loc 6 20 0 #APP # 20 "./arch/x86/include/asm/irqflags.h" 1 # __raw_save_flags pushf ; pop %r14 # flags That PUSHF implicitly pushes on the stack pointed by %rsp. But(!) we have switched the pagetable (i.e., %cr3 has efi_scratch.efi_pgt) and we're pushing to the VA where the stack *was* but is not anymore. Or maybe it is because you're copying all the PUDs. It is still not 100% clean, IMHO. Can you do the prolog/epilog calls inside the IRQs-off section? Btw, it was crap like that why I wanted to do SWITCH_PGT in asm... -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- To unsubscribe from this list: send the line "unsubscribe linux-efi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html