This is a prototype that allows modules to share 2MB text pages with other modules and BPF programs. Current version only covers core_layout. --- arch/x86/Kconfig | 1 + arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------ arch/x86/kernel/module.c | 1 + kernel/module/main.c | 23 +++++++++++++---------- kernel/module/strict_rwx.c | 3 --- kernel/trace/ftrace.c | 3 ++- 6 files changed, 41 insertions(+), 20 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f9920f1341c8..0b1ea05a1da6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -91,6 +91,7 @@ config X86 select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if X86_64 select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4f3204364caa..0e47a558c5bc 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(instr, insn_buff, insn_buff_sz); + if (system_state < SYSTEM_RUNNING) { + text_poke_early(instr, insn_buff, insn_buff_sz); + } else { + mutex_lock(&text_mutex); + text_poke(instr, insn_buff, insn_buff_sz); + mutex_unlock(&text_mutex); + } next: optimize_nops(instr, a->instrlen); @@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) optimize_nops(bytes, len); DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + if (system_state == SYSTEM_BOOTING) { + text_poke_early(addr, bytes, len); + } else { + mutex_lock(&text_mutex); + text_poke(addr, bytes, len); + mutex_unlock(&text_mutex); + } } } } @@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) if (len == insn.length) { DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, bytes, len); + } else { + mutex_lock(&text_mutex); + text_poke(addr, bytes, len); + mutex_unlock(&text_mutex); + } } } } @@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) */ DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(addr, &poison, 4); + text_poke(addr, &poison, 4); } } @@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, /* Pad the rest with nops */ add_nops(insn_buff + used, p->len - used); - text_poke_early(p->instr, insn_buff, p->len); + text_poke(p->instr, insn_buff, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], @@ -1699,7 +1717,7 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void * struct text_poke_loc tp; if (unlikely(system_state == SYSTEM_BOOTING)) { - text_poke_early(addr, opcode, len); + text_poke(addr, opcode, len); return; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b1abf663417c..577e31647dc4 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -229,6 +229,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, bool early = me->state == MODULE_STATE_UNFORMED; void *(*write)(void *, const void *, size_t) = memcpy; + early = false; if (!early) { write = text_poke; mutex_lock(&text_mutex); diff --git a/kernel/module/main.c b/kernel/module/main.c index a4e4d84b6f4e..b44806e31a56 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -53,6 +53,7 @@ #include <linux/bsearch.h> #include <linux/dynamic_debug.h> #include <linux/audit.h> +#include <linux/bpf.h> #include <uapi/linux/module.h> #include "internal.h" @@ -1203,7 +1204,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size); /* Finally, free the core (containing the module structure) */ - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC vfree(mod->data_layout.base); #endif @@ -1321,7 +1322,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { - sym[i].st_value = kernel_symbol_value(ksym); + unsigned long val = kernel_symbol_value(ksym); + bpf_arch_text_copy(&sym[i].st_value, &val, sizeof(val)); break; } @@ -1342,7 +1344,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) secbase = (unsigned long)mod_percpu(mod); else secbase = info->sechdrs[sym[i].st_shndx].sh_addr; - sym[i].st_value += secbase; + secbase += sym[i].st_value; + bpf_arch_text_copy(&sym[i].st_value, &secbase, sizeof(secbase)); break; } } @@ -2123,7 +2126,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc(mod->core_layout.size); + ptr = vmalloc_exec(mod->core_layout.size, PAGE_SIZE); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2133,7 +2136,7 @@ static int move_module(struct module *mod, struct load_info *info) if (!ptr) return -ENOMEM; - memset(ptr, 0, mod->core_layout.size); +/* memset(ptr, 0, mod->core_layout.size); */ mod->core_layout.base = ptr; if (mod->init_layout.size) { @@ -2146,7 +2149,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); return -ENOMEM; } memset(ptr, 0, mod->init_layout.size); @@ -2156,7 +2159,7 @@ static int move_module(struct module *mod, struct load_info *info) #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC /* Do the allocs. */ - ptr = vzalloc(mod->data_layout.size); + ptr = module_alloc(mod->data_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2164,7 +2167,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_not_leak(ptr); if (!ptr) { - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); module_memfree(mod->init_layout.base); return -ENOMEM; } @@ -2189,7 +2192,7 @@ static int move_module(struct module *mod, struct load_info *info) dest = mod->core_layout.base + shdr->sh_entsize; if (shdr->sh_type != SHT_NOBITS) - memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); + bpf_arch_text_copy(dest, (void *)shdr->sh_addr, shdr->sh_size); /* Update sh_addr to point to copy in image. */ shdr->sh_addr = (unsigned long)dest; pr_debug("\t0x%lx %s\n", @@ -2345,7 +2348,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC vfree(mod->data_layout.base); #endif diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 14fbea66f12f..d392eb7bf574 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -85,7 +85,6 @@ void module_enable_x(const struct module *mod) !PAGE_ALIGNED(mod->init_layout.base)) return; - frob_text(&mod->core_layout, set_memory_x); frob_text(&mod->init_layout, set_memory_x); } @@ -98,9 +97,7 @@ void module_enable_ro(const struct module *mod, bool after_init) return; #endif - set_vm_flush_reset_perms(mod->core_layout.base); set_vm_flush_reset_perms(mod->init_layout.base); - frob_text(&mod->core_layout, set_memory_ro); frob_rodata(&mod->data_layout, set_memory_ro); frob_text(&mod->init_layout, set_memory_ro); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 439e2ab6905e..818418d5b853 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3142,6 +3142,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) if (mod) rec_flags |= FTRACE_FL_DISABLED; + ftrace_arch_code_modify_prepare(); for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -3163,7 +3164,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) update_cnt++; } } - + ftrace_arch_code_modify_post_process(); stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += update_cnt; -- 2.30.2