linux-kernel@xxxxxxxxxxxxxxx, Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>, Uladzislau Rezki <urezki@xxxxxxxxx>, Christoph Hellwig <hch@xxxxxxxxxxxxx>, Lorenzo Stoakes <lstoakes@xxxxxxxxx> From: Maxwell Bland <mbland@xxxxxxxxxxxx> Date: Tue, 2 Apr 2024 15:15:01 -0500 Subject: [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc Makes red black tree allocation more flexible on a per-architecture basis by introducing an optional hooks to refine the red-black tree structuring and exposing vmalloc functions for clipping vmap areas, finding vmap areas, and inserting vmap areas. With this patch, the red-black vmap tree can be refined to account for architecture-specific memory management operations, most notably address space layout randomization, as these features conflict with generic management of a single vmalloc_start to vmalloc_end range as given by mm/vmalloc.c. For example, x86 is forced to restrict aslr to 1024 possible locations, which is a very, very small number, and arm64 breaks standard code/data partitioning altogether, which prevents the enforcement of performant immmutability on kernel page tables. Signed-off-by: Maxwell Bland <mbland@xxxxxxxxxxxx> --- include/linux/vmalloc.h | 24 ++++++++++++++++++++++++ mm/vmalloc.c | 16 ++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 98ea90e90439..3c5ce7ee0bea 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -12,6 +12,7 @@ #include <asm/vmalloc.h> +struct kmem_cache; struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */ struct iov_iter; /* in uio.h */ @@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) } #endif +#ifndef arch_skip_va +static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart) +{ + return false; +} +#endif + +#ifndef arch_refine_vmap_space +static inline void arch_refine_vmap_space(struct rb_root *root, + struct list_head *head, + struct kmem_cache *cachep) +{ +} +#endif + /* * Highlevel APIs for driver use */ @@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); +extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from, + struct rb_root *root, + struct list_head *head); +extern int va_clip(struct rb_root *root, struct list_head *head, + struct vmap_area *va, unsigned long nva_start_addr, + unsigned long size); +extern struct vmap_area *__find_vmap_area(unsigned long addr, + struct rb_root *root); struct vmap_area *find_vmap_area(unsigned long addr); static inline bool is_vm_area_hugepages(const void *addr) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 68fa001648cc..de4577a3708e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void) return atomic_long_read(&nr_vmalloc_pages); } -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) +struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) { struct rb_node *n = root->rb_node; @@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va, link_va(va, root, parent, link, head); } -static void +void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from, struct rb_root *root, struct list_head *head) @@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size, vstart < va->va_start) { node = node->rb_left; } else { - if (is_within_this_va(va, size, align, vstart)) + if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart)) return va; /* @@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size, */ while ((node = rb_parent(node))) { va = rb_entry(node, struct vmap_area, rb_node); - if (is_within_this_va(va, size, align, vstart)) + if (!arch_skip_va(va, vstart) && + is_within_this_va(va, size, align, vstart)) return va; if (get_subtree_max_size(node->rb_right) >= length && @@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size, struct vmap_area *va; list_for_each_entry(va, head, list) { - if (!is_within_this_va(va, size, align, vstart)) + if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart)) continue; return va; @@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va, return type; } -static __always_inline int +__always_inline int va_clip(struct rb_root *root, struct list_head *head, struct vmap_area *va, unsigned long nva_start_addr, unsigned long size) @@ -5129,4 +5130,7 @@ void __init vmalloc_init(void) vmap_node_shrinker->count_objects = vmap_node_shrink_count; vmap_node_shrinker->scan_objects = vmap_node_shrink_scan; shrinker_register(vmap_node_shrinker); + + arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list, + vmap_area_cachep); } -- 2.39.2