On Tue, Apr 02, 2024 at 03:15:01PM -0500, Maxwell Bland wrote: > Makes red black tree allocation more flexible on a per-architecture > basis by introducing an optional hooks to refine the red-black tree > structuring and exposing vmalloc functions for clipping vmap areas, > finding vmap areas, and inserting vmap areas. > > With this patch, the red-black vmap tree can be refined to account for > architecture-specific memory management operations, most notably address > space layout randomization, as these features conflict with generic > management of a single vmalloc_start to vmalloc_end range as given by > mm/vmalloc.c. > > For example, x86 is forced to restrict aslr to 1024 possible locations, > which is a very, very small number, and arm64 breaks standard code/data > partitioning altogether, which prevents the enforcement of performant > immmutability on kernel page tables. > > Signed-off-by: Maxwell Bland <mbland@xxxxxxxxxxxx> > --- > include/linux/vmalloc.h | 24 ++++++++++++++++++++++++ > mm/vmalloc.c | 16 ++++++++++------ > 2 files changed, 34 insertions(+), 6 deletions(-) > > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h > index 98ea90e90439..3c5ce7ee0bea 100644 > --- a/include/linux/vmalloc.h > +++ b/include/linux/vmalloc.h > @@ -12,6 +12,7 @@ > > #include <asm/vmalloc.h> > > +struct kmem_cache; > struct vm_area_struct; /* vma defining user mapping in mm_types.h */ > struct notifier_block; /* in notifier.h */ > struct iov_iter; /* in uio.h */ > @@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) > } > #endif > > +#ifndef arch_skip_va > +static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart) > +{ > + return false; > +} > +#endif > + > +#ifndef arch_refine_vmap_space > +static inline void arch_refine_vmap_space(struct rb_root *root, > + struct list_head *head, > + struct kmem_cache *cachep) > +{ > +} > +#endif > + > /* > * Highlevel APIs for driver use > */ > @@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, > void free_vm_area(struct vm_struct *area); > extern struct vm_struct *remove_vm_area(const void *addr); > extern struct vm_struct *find_vm_area(const void *addr); > +extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from, > + struct rb_root *root, > + struct list_head *head); > +extern int va_clip(struct rb_root *root, struct list_head *head, > + struct vmap_area *va, unsigned long nva_start_addr, > + unsigned long size); > +extern struct vmap_area *__find_vmap_area(unsigned long addr, > + struct rb_root *root); > To me it looks like you want to make internal functions as public for everyone which is not good, imho. > struct vmap_area *find_vmap_area(unsigned long addr); > > static inline bool is_vm_area_hugepages(const void *addr) > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index 68fa001648cc..de4577a3708e 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void) > return atomic_long_read(&nr_vmalloc_pages); > } > > -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) > +struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) > { > struct rb_node *n = root->rb_node; > > @@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va, > link_va(va, root, parent, link, head); > } > > -static void > +void > insert_vmap_area_augment(struct vmap_area *va, > struct rb_node *from, struct rb_root *root, > struct list_head *head) > @@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size, > vstart < va->va_start) { > node = node->rb_left; > } else { > - if (is_within_this_va(va, size, align, vstart)) > + if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart)) > return va; > > /* > @@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size, > */ > while ((node = rb_parent(node))) { > va = rb_entry(node, struct vmap_area, rb_node); > - if (is_within_this_va(va, size, align, vstart)) > + if (!arch_skip_va(va, vstart) && > + is_within_this_va(va, size, align, vstart)) > return va; > > if (get_subtree_max_size(node->rb_right) >= length && > @@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size, > struct vmap_area *va; > > list_for_each_entry(va, head, list) { > - if (!is_within_this_va(va, size, align, vstart)) > + if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart)) > continue; > arch_skip_va() injections into the search algorithm sounds like a hack and might lead(if i do not miss something, need to check closer) to alloc failures when we go toward a reserved VA but we are not allowed to allocate from. > return va; > @@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va, > return type; > } > > -static __always_inline int > +__always_inline int > va_clip(struct rb_root *root, struct list_head *head, > struct vmap_area *va, unsigned long nva_start_addr, > unsigned long size) > @@ -5129,4 +5130,7 @@ void __init vmalloc_init(void) > vmap_node_shrinker->count_objects = vmap_node_shrink_count; > vmap_node_shrinker->scan_objects = vmap_node_shrink_scan; > shrinker_register(vmap_node_shrinker); > + > + arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list, > + vmap_area_cachep); > } > Why do not you allocate just using a specific range from MODULES_ASLR_START till VMALLOC_END? Thanks! -- Uladzislau Rezki