at 12:10 PM, Nadav Amit <nadav.amit@xxxxxxxxx> wrote: > at 11:58 AM, Andy Lutomirski <luto@xxxxxxxxxx> wrote: > >> On Mon, Aug 27, 2018 at 11:54 AM, Nadav Amit <nadav.amit@xxxxxxxxx> wrote: >>>> On Mon, Aug 27, 2018 at 10:34 AM, Nadav Amit <nadav.amit@xxxxxxxxx> wrote: >>>> What do you all think? >>> >>> I agree in general. But I think that current->mm would need to be loaded, as >>> otherwise I am afraid it would break switch_mm_irqs_off(). >> >> What breaks? > > Actually nothing. I just saw the IBPB stuff regarding tsk, but it should not > matter. So here is what I got. It certainly needs some cleanup, but it boots. Let me know how crappy you find it... diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index bbc796eb0a3b..336779650a41 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -343,4 +343,24 @@ static inline unsigned long __get_current_cr3_fast(void) return cr3; } +typedef struct { + struct mm_struct *prev; +} temporary_mm_state_t; + +static inline temporary_mm_state_t use_temporary_mm(struct mm_struct *mm) +{ + temporary_mm_state_t state; + + lockdep_assert_irqs_disabled(); + state.prev = this_cpu_read(cpu_tlbstate.loaded_mm); + switch_mm_irqs_off(NULL, mm, current); + return state; +} + +static inline void unuse_temporary_mm(temporary_mm_state_t prev) +{ + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(NULL, prev.prev, current); +} + #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5715647fc4fe..ef62af9a0ef7 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -976,6 +976,10 @@ static inline void __meminit init_trampoline_default(void) /* Default trampoline pgd value */ trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; } + +void __init patching_mm_init(void); +#define patching_mm_init patching_mm_init + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 054765ab2da2..9f44262abde0 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -116,6 +116,9 @@ extern unsigned int ptrs_per_p4d; #define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) +#define TEXT_POKE_PGD_ENTRY -5UL +#define TEXT_POKE_ADDR (TEXT_POKE_PGD_ENTRY << PGDIR_SHIFT) + #define __VMALLOC_BASE_L4 0xffffc90000000000UL #define __VMALLOC_BASE_L5 0xffa0000000000000UL diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 99fff853c944..840c72ec8c4f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -505,6 +505,9 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); +struct mm_struct; +void set_mm_pte_vaddr(struct mm_struct *mm, unsigned long vaddr, pte_t pte); + #ifdef CONFIG_X86_32 extern void native_pagetable_init(void); #else diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 2ecd34e2d46c..cb364ea5b19d 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -38,4 +38,6 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern struct mm_struct *patching_mm; + #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a481763a3776..fd8a950b0d62 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -11,6 +11,7 @@ #include <linux/stop_machine.h> #include <linux/slab.h> #include <linux/kdebug.h> +#include <linux/mmu_context.h> #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> @@ -701,8 +702,36 @@ void *text_poke(void *addr, const void *opcode, size_t len) WARN_ON(!PageReserved(pages[0])); pages[1] = virt_to_page(addr + PAGE_SIZE); } - BUG_ON(!pages[0]); + local_irq_save(flags); + BUG_ON(!pages[0]); + + /* + * During initial boot, it is hard to initialize patching_mm due to + * dependencies in boot order. + */ + if (patching_mm) { + pte_t pte; + temporary_mm_state_t prev; + + prev = use_temporary_mm(patching_mm); + pte = mk_pte(pages[0], PAGE_KERNEL); + set_mm_pte_vaddr(patching_mm, TEXT_POKE_ADDR, pte); + pte = mk_pte(pages[1], PAGE_KERNEL); + set_mm_pte_vaddr(patching_mm, TEXT_POKE_ADDR + PAGE_SIZE, pte); + + memcpy((void *)(TEXT_POKE_ADDR | ((unsigned long)addr & ~PAGE_MASK)), + opcode, len); + + set_mm_pte_vaddr(patching_mm, TEXT_POKE_ADDR, __pte(0)); + set_mm_pte_vaddr(patching_mm, TEXT_POKE_ADDR + PAGE_SIZE, __pte(0)); + local_flush_tlb(); + sync_core(); + + unuse_temporary_mm(prev); + goto out; + } + set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); if (pages[1]) set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); @@ -715,6 +744,7 @@ void *text_poke(void *addr, const void *opcode, size_t len) sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ +out: for (i = 0; i < len; i++) BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); local_irq_restore(flags); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a688617c727e..bd0d629e3831 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -54,6 +54,7 @@ #include <asm/init.h> #include <asm/uv/uv.h> #include <asm/setup.h> +#include <asm/text-patching.h> #include "mm_internal.h" @@ -285,14 +286,14 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) __set_pte_vaddr(pud, vaddr, new_pte); } -void set_pte_vaddr(unsigned long vaddr, pte_t pteval) +void set_mm_pte_vaddr(struct mm_struct *mm, unsigned long vaddr, pte_t pteval) { pgd_t *pgd; p4d_t *p4d_page; pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval)); - pgd = pgd_offset_k(vaddr); + pgd = pgd_offset(mm, vaddr); if (pgd_none(*pgd)) { printk(KERN_ERR "PGD FIXMAP MISSING, it should be setup in head.S!\n"); @@ -303,6 +304,11 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) set_pte_vaddr_p4d(p4d_page, vaddr, pteval); } +void set_pte_vaddr(unsigned long vaddr, pte_t pteval) +{ + set_mm_pte_vaddr(&init_mm, vaddr, pteval); +} + pmd_t * __init populate_extra_pmd(unsigned long vaddr) { pgd_t *pgd; @@ -1399,6 +1405,17 @@ unsigned long memory_block_size_bytes(void) return memory_block_size_probed; } +struct mm_struct *patching_mm; +EXPORT_SYMBOL(patching_mm); + +void __init patching_mm_init(void) +{ + populate_extra_pte(TEXT_POKE_ADDR); + populate_extra_pte(TEXT_POKE_ADDR + PAGE_SIZE); + + patching_mm = copy_init_mm(); +} + #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Initialise the sparsemem vmemmap using huge-pages at the PMD level. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f59639afaa39..c95d2240c23a 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1083,6 +1083,10 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, static inline void init_espfix_bsp(void) { } #endif +#ifndef patching_mm_init +static inline void patching_mm_init(void) { } +#endif + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 108ede99e533..ac0a675678f5 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -74,6 +74,7 @@ extern void exit_itimers(struct signal_struct *); extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); +struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); diff --git a/init/main.c b/init/main.c index 3b4ada11ed52..9a313efc80a6 100644 --- a/init/main.c +++ b/init/main.c @@ -724,6 +724,7 @@ asmlinkage __visible void __init start_kernel(void) taskstats_init_early(); delayacct_init(); + patching_mm_init(); check_bugs(); acpi_subsystem_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 1b27babc4c78..325d1a5ca903 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1249,9 +1249,9 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ -static struct mm_struct *dup_mm(struct task_struct *tsk) +static struct mm_struct *dup_mm(struct task_struct *tsk, struct mm_struct *oldmm) { - struct mm_struct *mm, *oldmm = current->mm; + struct mm_struct *mm; int err; mm = allocate_mm(); @@ -1317,7 +1317,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) } retval = -ENOMEM; - mm = dup_mm(tsk); + mm = dup_mm(tsk, current->mm); if (!mm) goto fail_nomem; @@ -2082,6 +2082,11 @@ struct task_struct *fork_idle(int cpu) return task; } +struct mm_struct *copy_init_mm(void) +{ + return dup_mm(NULL, &init_mm); +} + /* * Ok, this is the main fork-routine. * -- 2.17.1 namit@sc2-haas01-esx0118:~/dev/linux-mainline$