Re: [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 02, 2024 at 03:15:01PM -0500, Maxwell Bland wrote:
> Makes red black tree allocation more flexible on a per-architecture
> basis by introducing an optional hooks to refine the red-black tree
> structuring and exposing vmalloc functions for clipping vmap areas,
> finding vmap areas, and inserting vmap areas.
> 
> With this patch, the red-black vmap tree can be refined to account for
> architecture-specific memory management operations, most notably address
> space layout randomization, as these features conflict with generic
> management of a single vmalloc_start to vmalloc_end range as given by
> mm/vmalloc.c.
> 
> For example, x86 is forced to restrict aslr to 1024 possible locations,
> which is a very, very small number, and arm64 breaks standard code/data
> partitioning altogether, which prevents the enforcement of performant
> immmutability on kernel page tables.
> 
> Signed-off-by: Maxwell Bland <mbland@xxxxxxxxxxxx>
> ---
>  include/linux/vmalloc.h | 24 ++++++++++++++++++++++++
>  mm/vmalloc.c            | 16 ++++++++++------
>  2 files changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 98ea90e90439..3c5ce7ee0bea 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -12,6 +12,7 @@
>  
>  #include <asm/vmalloc.h>
>  
> +struct kmem_cache;
>  struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
>  struct notifier_block;		/* in notifier.h */
>  struct iov_iter;		/* in uio.h */
> @@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
>  }
>  #endif
>  
> +#ifndef arch_skip_va
> +static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
> +{
> +	return false;
> +}
> +#endif
> +
> +#ifndef arch_refine_vmap_space
> +static inline void arch_refine_vmap_space(struct rb_root *root,
> +					  struct list_head *head,
> +					  struct kmem_cache *cachep)
> +{
> +}
> +#endif
> +
>  /*
>   *	Highlevel APIs for driver use
>   */
> @@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
>  void free_vm_area(struct vm_struct *area);
>  extern struct vm_struct *remove_vm_area(const void *addr);
>  extern struct vm_struct *find_vm_area(const void *addr);
> +extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
> +				     struct rb_root *root,
> +				     struct list_head *head);
> +extern int va_clip(struct rb_root *root, struct list_head *head,
> +		   struct vmap_area *va, unsigned long nva_start_addr,
> +		   unsigned long size);
> +extern struct vmap_area *__find_vmap_area(unsigned long addr,
> +					  struct rb_root *root);
>
To me it looks like you want to make internal functions as public for
everyone which is not good, imho.

>  struct vmap_area *find_vmap_area(unsigned long addr);
>  
>  static inline bool is_vm_area_hugepages(const void *addr)
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 68fa001648cc..de4577a3708e 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void)
>  	return atomic_long_read(&nr_vmalloc_pages);
>  }
>  
> -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
> +struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
>  {
>  	struct rb_node *n = root->rb_node;
>  
> @@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va,
>  		link_va(va, root, parent, link, head);
>  }
>  
> -static void
> +void
>  insert_vmap_area_augment(struct vmap_area *va,
>  	struct rb_node *from, struct rb_root *root,
>  	struct list_head *head)
> @@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
>  				vstart < va->va_start) {
>  			node = node->rb_left;
>  		} else {
> -			if (is_within_this_va(va, size, align, vstart))
> +			if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart))
>  				return va;
>  
>  			/*
> @@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
>  			 */
>  			while ((node = rb_parent(node))) {
>  				va = rb_entry(node, struct vmap_area, rb_node);
> -				if (is_within_this_va(va, size, align, vstart))
> +				if (!arch_skip_va(va, vstart) &&
> +				    is_within_this_va(va, size, align, vstart))
>  					return va;
>  
>  				if (get_subtree_max_size(node->rb_right) >= length &&
> @@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
>  	struct vmap_area *va;
>  
>  	list_for_each_entry(va, head, list) {
> -		if (!is_within_this_va(va, size, align, vstart))
> +		if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart))
>  			continue;
>  
arch_skip_va() injections into the search algorithm sounds like a hack
and might lead(if i do not miss something, need to check closer) to alloc
failures when we go toward a reserved VA but we are not allowed to allocate
from.

>  		return va;
> @@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va,
>  	return type;
>  }
>  
> -static __always_inline int
> +__always_inline int
>  va_clip(struct rb_root *root, struct list_head *head,
>  		struct vmap_area *va, unsigned long nva_start_addr,
>  		unsigned long size)
> @@ -5129,4 +5130,7 @@ void __init vmalloc_init(void)
>  	vmap_node_shrinker->count_objects = vmap_node_shrink_count;
>  	vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
>  	shrinker_register(vmap_node_shrinker);
> +
> +	arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list,
> +			       vmap_area_cachep);
>  }
>
Why do not you allocate just using a specific range from MODULES_ASLR_START
till VMALLOC_END?

Thanks!

--
Uladzislau Rezki




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux