This is a prototype that allows modules to share 2MB text pages with other modules and BPF programs. Current version only covers core_layout. --- arch/x86/Kconfig | 1 + arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------ arch/x86/kernel/module.c | 1 + kernel/module/main.c | 23 +++++++++++++---------- kernel/module/strict_rwx.c | 3 --- kernel/trace/ftrace.c | 3 ++- 6 files changed, 41 insertions(+), 20 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fb5900e2c29a..e932bceb7f23 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -91,6 +91,7 @@ config X86 select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if X86_64 select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 62f6b8b7c4a5..c83888ec232b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(instr, insn_buff, insn_buff_sz); + if (system_state < SYSTEM_RUNNING) { + text_poke_early(instr, insn_buff, insn_buff_sz); + } else { + mutex_lock(&text_mutex); + text_poke(instr, insn_buff, insn_buff_sz); + mutex_unlock(&text_mutex); + } next: optimize_nops(instr, a->instrlen); @@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) optimize_nops(bytes, len); DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + if (system_state == SYSTEM_BOOTING) { + text_poke_early(addr, bytes, len); + } else { + mutex_lock(&text_mutex); + text_poke(addr, bytes, len); + mutex_unlock(&text_mutex); + } } } } @@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) if (len == insn.length) { DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, bytes, len); + } else { + mutex_lock(&text_mutex); + text_poke(addr, bytes, len); + mutex_unlock(&text_mutex); + } } } } @@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) */ DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(addr, &poison, 4); + text_poke(addr, &poison, 4); } } @@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, /* Pad the rest with nops */ add_nops(insn_buff + used, p->len - used); - text_poke_early(p->instr, insn_buff, p->len); + text_poke(p->instr, insn_buff, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], @@ -1698,7 +1716,7 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void * struct text_poke_loc tp; if (unlikely(system_state == SYSTEM_BOOTING)) { - text_poke_early(addr, opcode, len); + text_poke(addr, opcode, len); return; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 100446ffdc1d..570af623e28f 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -229,6 +229,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, bool early = me->state == MODULE_STATE_UNFORMED; void *(*write)(void *, const void *, size_t) = memcpy; + early = false; if (!early) { write = text_poke; mutex_lock(&text_mutex); diff --git a/kernel/module/main.c b/kernel/module/main.c index 57fc2821be63..c51dafa1089a 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -53,6 +53,7 @@ #include <linux/bsearch.h> #include <linux/dynamic_debug.h> #include <linux/audit.h> +#include <linux/bpf.h> #include <uapi/linux/module.h> #include "internal.h" @@ -1198,7 +1199,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size); /* Finally, free the core (containing the module structure) */ - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC vfree(mod->data_layout.base); #endif @@ -1316,7 +1317,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { - sym[i].st_value = kernel_symbol_value(ksym); + unsigned long val = kernel_symbol_value(ksym); + bpf_arch_text_copy(&sym[i].st_value, &val, sizeof(val)); break; } @@ -1337,7 +1339,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) secbase = (unsigned long)mod_percpu(mod); else secbase = info->sechdrs[sym[i].st_shndx].sh_addr; - sym[i].st_value += secbase; + secbase += sym[i].st_value; + bpf_arch_text_copy(&sym[i].st_value, &secbase, sizeof(secbase)); break; } } @@ -2118,7 +2121,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc(mod->core_layout.size); + ptr = vmalloc_exec(mod->core_layout.size, PAGE_SIZE); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2128,7 +2131,7 @@ static int move_module(struct module *mod, struct load_info *info) if (!ptr) return -ENOMEM; - memset(ptr, 0, mod->core_layout.size); +/* memset(ptr, 0, mod->core_layout.size); */ mod->core_layout.base = ptr; if (mod->init_layout.size) { @@ -2141,7 +2144,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); return -ENOMEM; } memset(ptr, 0, mod->init_layout.size); @@ -2151,7 +2154,7 @@ static int move_module(struct module *mod, struct load_info *info) #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC /* Do the allocs. */ - ptr = vmalloc(mod->data_layout.size); + ptr = module_alloc(mod->data_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2159,7 +2162,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_not_leak(ptr); if (!ptr) { - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); module_memfree(mod->init_layout.base); return -ENOMEM; } @@ -2185,7 +2188,7 @@ static int move_module(struct module *mod, struct load_info *info) dest = mod->core_layout.base + shdr->sh_entsize; if (shdr->sh_type != SHT_NOBITS) - memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); + bpf_arch_text_copy(dest, (void *)shdr->sh_addr, shdr->sh_size); /* Update sh_addr to point to copy in image. */ shdr->sh_addr = (unsigned long)dest; pr_debug("\t0x%lx %s\n", @@ -2341,7 +2344,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC vfree(mod->data_layout.base); #endif diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 14fbea66f12f..d392eb7bf574 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -85,7 +85,6 @@ void module_enable_x(const struct module *mod) !PAGE_ALIGNED(mod->init_layout.base)) return; - frob_text(&mod->core_layout, set_memory_x); frob_text(&mod->init_layout, set_memory_x); } @@ -98,9 +97,7 @@ void module_enable_ro(const struct module *mod, bool after_init) return; #endif - set_vm_flush_reset_perms(mod->core_layout.base); set_vm_flush_reset_perms(mod->init_layout.base); - frob_text(&mod->core_layout, set_memory_ro); frob_rodata(&mod->data_layout, set_memory_ro); frob_text(&mod->init_layout, set_memory_ro); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bc921a3f7ea8..8cd31dc9ac84 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3177,6 +3177,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) if (mod) rec_flags |= FTRACE_FL_DISABLED; + ftrace_arch_code_modify_prepare(); for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -3198,7 +3199,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) update_cnt++; } } - + ftrace_arch_code_modify_post_process(); stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += update_cnt; -- 2.30.2