Le 27/01/2023 à 00:36, Song Liu a écrit : > module_layout manages different types of memory (text, data, rodata, etc.) > in one allocation, which is problematic for some reasons: > > 1. It is hard to enable CONFIG_STRICT_MODULE_RWX. > 2. It is hard to use huge pages in modules (and not break strict rwx). > 3. Many archs uses module_layout for arch-specific data, but it is not > obvious how these data are used (are they RO, RX, or RW?) > > Improve the scenario by replacing 2 (or 3) module_layout per module with > up to 7 module_memory per module: > > MOD_MEM_TYPE_TEXT, > MOD_MEM_TYPE_DATA, > MOD_MEM_TYPE_RODATA, > MOD_MEM_TYPE_RO_AFTER_INIT, > MOD_MEM_TYPE_INIT_TEXT, > MOD_MEM_TYPE_INIT_DATA, > MOD_MEM_TYPE_INIT_RODATA, > > and allocating them separately. This adds slightly more entries to > mod_tree (from up to 3 entries per module, to up to 7 entries per > module). However, this at most adds a small constant overhead to > __module_address(), which is expected to be fast. > > Various archs use module_layout for different data. These data are put > into different module_memory based on their location in module_layout. > IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT; > data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc. > > module_memory simplifies quite some of the module code. For example, > ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a > different allocator for the data. kernel/module/strict_rwx.c is also > much cleaner with module_memory. > > Signed-off-by: Song Liu <song@xxxxxxxxxx> > Cc: Luis Chamberlain <mcgrof@xxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Cc: Guenter Roeck <linux@xxxxxxxxxxxx> > > --- > > This is the preparation work for the type aware module_alloc() discussed > in [1]. While this work is not covered much in the discussion, it is a > critical step of the effort. > > As this part grows pretty big (~1000 lines, + and -), I would like get > some feedback on it, so that I know it is on the right track. > > Please share your comments. Thanks! > > Test coverage: Tested on x86_64. > Build tested by kernel test bot in [2]. The only regression in [2] was a > typo in parisc, which is also fixed. > > [1] https://lore.kernel.org/linux-mm/20221107223921.3451913-1-song@xxxxxxxxxx/T/#u > [2] https://lore.kernel.org/linux-raid/63b8827e.clJQX2wg+I+tiX7m%25lkp@xxxxxxxxx/T/#u > > Changes v1 => v2: > 1. Add data_addr_[min|max] for CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > case. > > Changes v2 => v3: > 1. Fix and remove the last use of INIT_OFFSET_MASK. > 2. Add more information in the commit log. (Luis Chamberlain) > 3. Rebase and fix issues in x86/calltrunks. > 4. Minor cleanup. > --- > arch/alpha/kernel/module.c | 3 +- > arch/arc/kernel/unwind.c | 9 +- > arch/arm/kernel/module-plts.c | 5 +- > arch/arm64/kernel/module-plts.c | 5 +- > arch/ia64/kernel/module.c | 31 ++- > arch/mips/kernel/vpe.c | 11 +- > arch/parisc/kernel/module.c | 53 +++-- > arch/powerpc/kernel/module_32.c | 10 +- > arch/s390/kernel/module.c | 30 +-- > arch/x86/kernel/callthunks.c | 5 +- > arch/x86/kernel/module.c | 4 +- > include/linux/module.h | 65 +++--- > kernel/module/internal.h | 47 ++-- > kernel/module/kallsyms.c | 60 ++--- > kernel/module/kdb.c | 17 +- > kernel/module/main.c | 378 +++++++++++++++++++------------- > kernel/module/procfs.c | 17 +- > kernel/module/strict_rwx.c | 113 +++------- > kernel/module/tree_lookup.c | 43 ++-- > 19 files changed, 497 insertions(+), 409 deletions(-) > > diff --git a/include/linux/module.h b/include/linux/module.h > index 8c5909c0076c..c5e78cb47e13 100644 > --- a/include/linux/module.h > +++ b/include/linux/module.h > @@ -320,17 +320,22 @@ struct mod_tree_node { > struct latch_tree_node node; > }; > > -struct module_layout { > - /* The actual code + data. */ > +enum mod_mem_type { > + MOD_MEM_TYPE_TEXT, > + MOD_MEM_TYPE_DATA, > + MOD_MEM_TYPE_RODATA, > + MOD_MEM_TYPE_RO_AFTER_INIT, > + MOD_MEM_TYPE_INIT_TEXT, > + MOD_MEM_TYPE_INIT_DATA, > + MOD_MEM_TYPE_INIT_RODATA, > + > + MOD_MEM_NUM_TYPES, > + MOD_MEM_TYPE_INVALID = -1, > +}; Ok, so we agreed to keep it as a table with enums. Fair enough. However, can we try to make it less ugly and more readable ? I don't thing the enums needs to be prefixed by MOD_MEM_TYPE_ Would be enough with MOD_TEXT, MOD_DATA, MOD_RODATA, MOD_RO_AFTER_INIT, MOD_INIT_TEXT, MOD_INIT_DATA, MOD_INIT_RODATA, MOD_INVALID. > + > +struct module_memory { > void *base; > - /* Total size. */ > unsigned int size; > - /* The size of the executable code. */ > - unsigned int text_size; > - /* Size of RO section of the module (text+rodata) */ > - unsigned int ro_size; > - /* Size of RO after init section */ > - unsigned int ro_after_init_size; > > #ifdef CONFIG_MODULES_TREE_LOOKUP > struct mod_tree_node mtn; > @@ -339,9 +344,9 @@ struct module_layout { > > #ifdef CONFIG_MODULES_TREE_LOOKUP > /* Only touch one cacheline for common rbtree-for-core-layout case. */ > -#define __module_layout_align ____cacheline_aligned > +#define __module_memory_align ____cacheline_aligned > #else > -#define __module_layout_align > +#define __module_memory_align > #endif > > struct mod_kallsyms { > @@ -418,12 +423,8 @@ struct module { > /* Startup function. */ > int (*init)(void); > > - /* Core layout: rbtree is accessed frequently, so keep together. */ > - struct module_layout core_layout __module_layout_align; > - struct module_layout init_layout; > -#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > - struct module_layout data_layout; > -#endif > + /* rbtree is accessed frequently, so keep together. */ > + struct module_memory mod_mem[MOD_MEM_NUM_TYPES] __module_memory_align; We are already in a struct called module, so the module_memory struct could be called mem[MOD_MEM_NUM_TYPES] > > /* Arch-specific module values */ > struct mod_arch_specific arch; > @@ -573,23 +574,35 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); > bool is_module_percpu_address(unsigned long addr); > bool is_module_text_address(unsigned long addr); > > +static inline bool within_module_mem_type(unsigned long addr, > + const struct module *mod, > + enum mod_mem_type type) > +{ > + const struct module_memory *mod_mem; > + > + if (WARN_ON_ONCE(type < MOD_MEM_TYPE_TEXT || type >= MOD_MEM_NUM_TYPES)) Here I would rather use 0 instead of MOD_MEM_TYPE_TEXT because MOD_MEM_TYPE_TEXT may change in the future. > + return false; > + > + mod_mem = &mod->mod_mem[type]; I can't see the added value of the mod_ prefix. Would read better as mem = &mod->mem[type]; return (unsigned long)mem->base <= addr && addr < (unsigned long)mem->base + mem->size; And could be even more readable as: unsigned long base, size; base = (unsigned long)mod->mod_mem[type].base; size = mod->mod_mem[type].size; return base <= addr && addr < base + size; > + return (unsigned long)mod_mem->base <= addr && > + addr < (unsigned long)mod_mem->base + mod_mem->size; > +} > + > static inline bool within_module_core(unsigned long addr, > const struct module *mod) > { > -#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > - if ((unsigned long)mod->data_layout.base <= addr && > - addr < (unsigned long)mod->data_layout.base + mod->data_layout.size) > - return true; > -#endif > - return (unsigned long)mod->core_layout.base <= addr && > - addr < (unsigned long)mod->core_layout.base + mod->core_layout.size; > + return within_module_mem_type(addr, mod, MOD_MEM_TYPE_TEXT) || > + within_module_mem_type(addr, mod, MOD_MEM_TYPE_DATA) || > + within_module_mem_type(addr, mod, MOD_MEM_TYPE_RODATA) || > + within_module_mem_type(addr, mod, MOD_MEM_TYPE_RO_AFTER_INIT); > } > > static inline bool within_module_init(unsigned long addr, > const struct module *mod) > { > - return (unsigned long)mod->init_layout.base <= addr && > - addr < (unsigned long)mod->init_layout.base + mod->init_layout.size; > + return within_module_mem_type(addr, mod, MOD_MEM_TYPE_INIT_TEXT) || > + within_module_mem_type(addr, mod, MOD_MEM_TYPE_INIT_DATA) || > + within_module_mem_type(addr, mod, MOD_MEM_TYPE_INIT_RODATA); > } > > static inline bool within_module(unsigned long addr, const struct module *mod) > diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c > index 4523f99b0358..9ed233de312a 100644 > --- a/kernel/module/kallsyms.c > +++ b/kernel/module/kallsyms.c > @@ -113,11 +117,13 @@ void layout_symtab(struct module *mod, struct load_info *info) > Elf_Shdr *strsect = info->sechdrs + info->index.str; > const Elf_Sym *src; > unsigned int i, nsrc, ndst, strtab_size = 0; > + struct module_memory *mod_mem_data = &mod->mod_mem[MOD_MEM_TYPE_DATA]; > + struct module_memory *mod_mem_init_data = &mod->mod_mem[MOD_MEM_TYPE_INIT_DATA]; One more exemple of something that would be more readable as: struct module_memory *mod_data = &mod->mem[MOD_DATA]; struct module_memory *mod_init_data = &mod->mod[MOD_INIT_DATA]; > > /* Put symbol section at end of init part of module. */ > symsect->sh_flags |= SHF_ALLOC; > - symsect->sh_entsize = module_get_offset(mod, &mod->init_layout.size, symsect, > - info->index.sym) | INIT_OFFSET_MASK; > + symsect->sh_entsize = module_get_offset_and_type(mod, MOD_MEM_TYPE_INIT_DATA, > + symsect, info->index.sym); > pr_debug("\t%s\n", info->secstrings + symsect->sh_name); > > src = (void *)info->hdr + symsect->sh_offset; > diff --git a/kernel/module/main.c b/kernel/module/main.c > index d3be89de706d..32f63c6eaa61 100644 > --- a/kernel/module/main.c > +++ b/kernel/module/main.c > @@ -923,11 +930,30 @@ static ssize_t store_uevent(struct module_attribute *mattr, > struct module_attribute module_uevent = > __ATTR(uevent, 0200, NULL, store_uevent); > > +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > + > +static ssize_t show_coresize(struct module_attribute *mattr, > + struct module_kobject *mk, char *buffer) > +{ > + unsigned int size = 0; size is not used > + > + return sprintf(buffer, "%u\n", > + mk->mod->mod_mem[MOD_MEM_TYPE_TEXT].size); Wouldn't it read better as: return sprintf(buffer, "%u\n", mk->mod->mem[MOD_TEXT].size); > +} > + > +#else > + > static ssize_t show_coresize(struct module_attribute *mattr, > struct module_kobject *mk, char *buffer) > { > - return sprintf(buffer, "%u\n", mk->mod->core_layout.size); > + enum mod_mem_type type; > + unsigned int size = 0; > + > + for_core_mod_mem_type(type) > + size += mk->mod->mod_mem[type].size; > + return sprintf(buffer, "%u\n", size); > } > +#endif > > static struct module_attribute modinfo_coresize = > __ATTR(coresize, 0444, show_coresize, NULL); Christophe