Hi Mike, On Mon, 9 Sept 2024 at 08:51, Mike Rapoport <rppt@xxxxxxxxxx> wrote: > > From: "Mike Rapoport (Microsoft)" <rppt@xxxxxxxxxx> > > Using large pages to map text areas reduces iTLB pressure and improves > performance. > > Extend execmem_alloc() with an ability to use huge pages with ROX > permissions as a cache for smaller allocations. > > To populate the cache, a writable large page is allocated from vmalloc with > VM_ALLOW_HUGE_VMAP, filled with invalid instructions and then remapped as > ROX. > > Portions of that large page are handed out to execmem_alloc() callers > without any changes to the permissions. > > When the memory is freed with execmem_free() it is invalidated again so > that it won't contain stale instructions. > > The cache is enabled when an architecture sets EXECMEM_ROX_CACHE flag in > definition of an execmem_range. > > Signed-off-by: Mike Rapoport (Microsoft) <rppt@xxxxxxxxxx> > --- > include/linux/execmem.h | 2 + > mm/execmem.c | 289 +++++++++++++++++++++++++++++++++++++++- > 2 files changed, 286 insertions(+), 5 deletions(-) > > diff --git a/include/linux/execmem.h b/include/linux/execmem.h > index dfdf19f8a5e8..7436aa547818 100644 > --- a/include/linux/execmem.h > +++ b/include/linux/execmem.h > @@ -77,12 +77,14 @@ struct execmem_range { > > /** > * struct execmem_info - architecture parameters for code allocations > + * @fill_trapping_insns: set memory to contain instructions that will trap > * @ranges: array of parameter sets defining architecture specific > * parameters for executable memory allocations. The ranges that are not > * explicitly initialized by an architecture use parameters defined for > * @EXECMEM_DEFAULT. > */ > struct execmem_info { > + void (*fill_trapping_insns)(void *ptr, size_t size, bool writable); > struct execmem_range ranges[EXECMEM_TYPE_MAX]; > }; > > diff --git a/mm/execmem.c b/mm/execmem.c > index 0f6691e9ffe6..f547c1f3c93d 100644 > --- a/mm/execmem.c > +++ b/mm/execmem.c > @@ -7,28 +7,88 @@ > */ > > #include <linux/mm.h> > +#include <linux/mutex.h> > #include <linux/vmalloc.h> > #include <linux/execmem.h> > +#include <linux/maple_tree.h> > #include <linux/moduleloader.h> > #include <linux/text-patching.h> > > +#include <asm/tlbflush.h> > + > +#include "internal.h" > + > static struct execmem_info *execmem_info __ro_after_init; > static struct execmem_info default_execmem_info __ro_after_init; > > -static void *__execmem_alloc(struct execmem_range *range, size_t size) > +#ifdef CONFIG_MMU > +struct execmem_cache { > + struct mutex mutex; > + struct maple_tree busy_areas; > + struct maple_tree free_areas; > +}; > + > +static struct execmem_cache execmem_cache = { > + .mutex = __MUTEX_INITIALIZER(execmem_cache.mutex), > + .busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN, > + execmem_cache.mutex), > + .free_areas = MTREE_INIT_EXT(free_areas, MT_FLAGS_LOCK_EXTERN, > + execmem_cache.mutex), > +}; > + > +static void execmem_cache_clean(struct work_struct *work) > +{ > + struct maple_tree *free_areas = &execmem_cache.free_areas; > + struct mutex *mutex = &execmem_cache.mutex; > + MA_STATE(mas, free_areas, 0, ULONG_MAX); > + void *area; > + > + mutex_lock(mutex); > + mas_for_each(&mas, area, ULONG_MAX) { > + size_t size; > + > + if (!xa_is_value(area)) > + continue; > + > + size = xa_to_value(area); > + > + if (IS_ALIGNED(size, PMD_SIZE) && > + IS_ALIGNED(mas.index, PMD_SIZE)) { > + void *ptr = (void *)mas.index; > + > + mas_erase(&mas); > + vfree(ptr); > + } > + } > + mutex_unlock(mutex); > +} > + > +static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean); > + > +static void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable) > +{ > + if (execmem_info->fill_trapping_insns) > + execmem_info->fill_trapping_insns(ptr, size, writable); > + else > + memset(ptr, 0, size); Does this really have to be a function pointer with a runtime check? This could just be a __weak definition, with the arch providing an override if the memset() is not appropriate.