On Tue, Oct 15, 2024 at 5:19 AM 'Mike Rapoport' via kernel-team <kernel-team@xxxxxxxxxxx> wrote: > > On Mon, Oct 14, 2024 at 01:36:44PM -0700, Suren Baghdasaryan wrote: > > The memory reserved for module tags does not need to be backed by > > physical pages until there are tags to store there. Change the way > > we reserve this memory to allocate only virtual area for the tags > > and populate it with physical pages as needed when we load a module. > > > > Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> > > --- > > include/linux/execmem.h | 11 ++++++ > > include/linux/vmalloc.h | 9 +++++ > > lib/alloc_tag.c | 84 +++++++++++++++++++++++++++++++++-------- > > mm/execmem.c | 16 ++++++++ > > mm/vmalloc.c | 4 +- > > 5 files changed, 106 insertions(+), 18 deletions(-) > > > > diff --git a/include/linux/execmem.h b/include/linux/execmem.h > > index 7436aa547818..a159a073270a 100644 > > --- a/include/linux/execmem.h > > +++ b/include/linux/execmem.h > > @@ -127,6 +127,17 @@ void *execmem_alloc(enum execmem_type type, size_t size); > > */ > > void execmem_free(void *ptr); > > > > +/** > > + * execmem_vmap - create virtual mapping for executable memory > > + * @type: type of the allocation > > + * @size: size of the virtual mapping in bytes > > + * > > + * Maps virtually contiguous area that can be populated with executable code. > > + * > > + * Return: the area descriptor on success or %NULL on failure. > > + */ > > +struct vm_struct *execmem_vmap(enum execmem_type type, size_t size); > > + > > I think it's better limit it to EXECMEM_MODULE_DATA Ack. > > > /** > > * execmem_update_copy - copy an update to executable memory > > * @dst: destination address to update > > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h > > index 9a012cd4fad2..9d64cc6f24d1 100644 > > --- a/include/linux/vmalloc.h > > +++ b/include/linux/vmalloc.h > > @@ -202,6 +202,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, > > extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, > > unsigned long pgoff); > > > > +int vmap_pages_range(unsigned long addr, unsigned long end, > > + pgprot_t prot, struct page **pages, unsigned int page_shift); > > + > > > > /* > > * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values > > * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() > > @@ -239,6 +242,12 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, > > unsigned long flags, > > unsigned long start, unsigned long end, > > const void *caller); > > +struct vm_struct *__get_vm_area_node(unsigned long size, > > + unsigned long align, unsigned long shift, > > + unsigned long flags, unsigned long start, > > + unsigned long end, int node, gfp_t gfp_mask, > > + const void *caller); > > + > > This is not used outside mm/, let's put it into mm/internal.h Ack. > > > void free_vm_area(struct vm_struct *area); > > extern struct vm_struct *remove_vm_area(const void *addr); > > extern struct vm_struct *find_vm_area(const void *addr); > > diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c > > index b10e7f17eeda..648f32d52b8d 100644 > > --- a/lib/alloc_tag.c > > +++ b/lib/alloc_tag.c > > @@ -8,6 +8,7 @@ > > #include <linux/proc_fs.h> > > #include <linux/seq_buf.h> > > #include <linux/seq_file.h> > > +#include <linux/vmalloc.h> > > > > static struct codetag_type *alloc_tag_cttype; > > > > @@ -153,6 +154,7 @@ static void __init procfs_init(void) > > #ifdef CONFIG_MODULES > > > > static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE); > > +static struct vm_struct *vm_module_tags; > > /* A dummy object used to indicate an unloaded module */ > > static struct module unloaded_mod; > > /* A dummy object used to indicate a module prepended area */ > > @@ -195,6 +197,25 @@ static void clean_unused_module_areas_locked(void) > > } > > } > > > > +static int vm_module_tags_grow(unsigned long addr, unsigned long bytes) > > +{ > > + struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; > > + unsigned long more_pages = ALIGN(bytes, PAGE_SIZE) >> PAGE_SHIFT; > > + unsigned long nr; > > + > > + nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, > > + NUMA_NO_NODE, more_pages, next_page); > > + if (nr != more_pages) > > + return -ENOMEM; > > + > > + vm_module_tags->nr_pages += nr; > > + if (vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), > > + PAGE_KERNEL, next_page, PAGE_SHIFT) < 0) > > + return -ENOMEM; > > + > > + return 0; > > +} > > + > > static void *reserve_module_tags(struct module *mod, unsigned long size, > > unsigned int prepend, unsigned long align) > > { > > @@ -202,7 +223,7 @@ static void *reserve_module_tags(struct module *mod, unsigned long size, > > MA_STATE(mas, &mod_area_mt, 0, section_size - 1); > > bool cleanup_done = false; > > unsigned long offset; > > - void *ret; > > + void *ret = NULL; > > > > /* If no tags return NULL */ > > if (size < sizeof(struct alloc_tag)) > > @@ -239,7 +260,7 @@ static void *reserve_module_tags(struct module *mod, unsigned long size, > > goto repeat; > > } else { > > ret = ERR_PTR(-ENOMEM); > > - goto out; > > + goto unlock; > > } > > > > found: > > @@ -254,7 +275,7 @@ static void *reserve_module_tags(struct module *mod, unsigned long size, > > mas_store(&mas, &prepend_mod); > > if (mas_is_err(&mas)) { > > ret = ERR_PTR(xa_err(mas.node)); > > - goto out; > > + goto unlock; > > } > > mas.index = offset; > > mas.last = offset + size - 1; > > @@ -263,7 +284,7 @@ static void *reserve_module_tags(struct module *mod, unsigned long size, > > ret = ERR_PTR(xa_err(mas.node)); > > mas.index = pad_start; > > mas_erase(&mas); > > - goto out; > > + goto unlock; > > } > > > > } else { > > @@ -271,18 +292,33 @@ static void *reserve_module_tags(struct module *mod, unsigned long size, > > mas_store(&mas, mod); > > if (mas_is_err(&mas)) { > > ret = ERR_PTR(xa_err(mas.node)); > > - goto out; > > + goto unlock; > > } > > } > > +unlock: > > + mas_unlock(&mas); > > + if (IS_ERR(ret)) > > + return ret; > > > > - if (module_tags.size < offset + size) > > - module_tags.size = offset + size; > > + if (module_tags.size < offset + size) { > > + unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT; > > > > - ret = (struct alloc_tag *)(module_tags.start_addr + offset); > > -out: > > - mas_unlock(&mas); > > + module_tags.size = offset + size; > > + if (phys_size < module_tags.size) { > > + int grow_res; > > + > > + grow_res = vm_module_tags_grow(module_tags.start_addr + phys_size, > > + module_tags.size - phys_size); > > + if (grow_res) { > > + static_branch_disable(&mem_alloc_profiling_key); > > + pr_warn("Failed to allocate tags memory for module %s. Memory profiling is disabled!\n", > > + mod->name); > > + return ERR_PTR(grow_res); > > + } > > + } > > + } > > The diff for reserve_module_tags() is hard to read, and the function itself > becomes really complex to follow with all the gotos back and forth. > Maybe it's possible to split out some parts of it as helpers? Got it. Will refactor this function to make it easier to review. Thanks for the prompt review, Mike! > > > - return ret; > > + return (struct alloc_tag *)(module_tags.start_addr + offset); > > } > > > > -- > Sincerely yours, > Mike. > > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@xxxxxxxxxxx. >