* Suren Baghdasaryan <surenb@xxxxxxxxxx> [241226 12:07]: > To enable SLAB_TYPESAFE_BY_RCU for vma cache we need to ensure that > object reuse before RCU grace period is over will be detected by > lock_vma_under_rcu(). Current checks are sufficient as long as vma > is detached before it is freed. Implement this guarantee by calling > vma_ensure_detached() before vma is freed and make vm_area_cachep > SLAB_TYPESAFE_BY_RCU. This will facilitate vm_area_struct reuse and > will minimize the number of call_rcu() calls. > > Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> > --- > include/linux/mm.h | 2 -- > include/linux/mm_types.h | 10 +++++++--- > include/linux/slab.h | 6 ------ > kernel/fork.c | 31 +++++++++---------------------- > mm/mmap.c | 3 ++- > mm/vma.c | 10 +++------- > mm/vma.h | 2 +- > tools/testing/vma/vma_internal.h | 7 +------ > 8 files changed, 23 insertions(+), 48 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 56a7d70ca5bd..017d70e1d432 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -258,8 +258,6 @@ void setup_initial_init_mm(void *start_code, void *end_code, > struct vm_area_struct *vm_area_alloc(struct mm_struct *); > struct vm_area_struct *vm_area_dup(struct vm_area_struct *); > void vm_area_free(struct vm_area_struct *); > -/* Use only if VMA has no other users */ > -void __vm_area_free(struct vm_area_struct *vma); > > #ifndef CONFIG_MMU > extern struct rb_root nommu_region_tree; > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index b5312421dec6..3ca4695f6d0f 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -574,6 +574,12 @@ static inline void *folio_get_private(struct folio *folio) > > typedef unsigned long vm_flags_t; > > +/* > + * freeptr_t represents a SLUB freelist pointer, which might be encoded > + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. > + */ > +typedef struct { unsigned long v; } freeptr_t; > + > /* > * A region containing a mapping of a non-memory backed file under NOMMU > * conditions. These are held in a global tree and are pinned by the VMAs that > @@ -687,9 +693,7 @@ struct vm_area_struct { > unsigned long vm_start; > unsigned long vm_end; > }; > -#ifdef CONFIG_PER_VMA_LOCK > - struct rcu_head vm_rcu; /* Used for deferred freeing. */ > -#endif > + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ > }; > > /* > diff --git a/include/linux/slab.h b/include/linux/slab.h > index 10a971c2bde3..681b685b6c4e 100644 > --- a/include/linux/slab.h > +++ b/include/linux/slab.h > @@ -234,12 +234,6 @@ enum _slab_flag_bits { > #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED > #endif > > -/* > - * freeptr_t represents a SLUB freelist pointer, which might be encoded > - * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. > - */ > -typedef struct { unsigned long v; } freeptr_t; > - > /* > * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. > * > diff --git a/kernel/fork.c b/kernel/fork.c > index 7a0800d48112..da3b1ebfd282 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -471,7 +471,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) > return new; > } > > -void __vm_area_free(struct vm_area_struct *vma) > +void vm_area_free(struct vm_area_struct *vma) > { > /* The vma should be detached while being destroyed. */ > vma_assert_detached(vma); > @@ -480,25 +480,6 @@ void __vm_area_free(struct vm_area_struct *vma) > kmem_cache_free(vm_area_cachep, vma); > } > > -#ifdef CONFIG_PER_VMA_LOCK > -static void vm_area_free_rcu_cb(struct rcu_head *head) > -{ > - struct vm_area_struct *vma = container_of(head, struct vm_area_struct, > - vm_rcu); > - > - __vm_area_free(vma); > -} > -#endif > - > -void vm_area_free(struct vm_area_struct *vma) > -{ > -#ifdef CONFIG_PER_VMA_LOCK > - call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb); > -#else > - __vm_area_free(vma); > -#endif > -} > - > static void account_kernel_stack(struct task_struct *tsk, int account) > { > if (IS_ENABLED(CONFIG_VMAP_STACK)) { > @@ -3144,6 +3125,11 @@ void __init mm_cache_init(void) > > void __init proc_caches_init(void) > { > + struct kmem_cache_args args = { > + .use_freeptr_offset = true, > + .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr), > + }; > + > sighand_cachep = kmem_cache_create("sighand_cache", > sizeof(struct sighand_struct), 0, > SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| > @@ -3160,8 +3146,9 @@ void __init proc_caches_init(void) > sizeof(struct fs_struct), 0, > SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, > NULL); > - vm_area_cachep = KMEM_CACHE(vm_area_struct, > - SLAB_HWCACHE_ALIGN|SLAB_NO_MERGE|SLAB_PANIC| > + vm_area_cachep = kmem_cache_create("vm_area_struct", > + sizeof(struct vm_area_struct), &args, > + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| > SLAB_ACCOUNT); > mmap_init(); > nsproxy_cache_init(); > diff --git a/mm/mmap.c b/mm/mmap.c > index 3cc8de07411d..7fdc4207fe98 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1364,7 +1364,8 @@ void exit_mmap(struct mm_struct *mm) > do { > if (vma->vm_flags & VM_ACCOUNT) > nr_accounted += vma_pages(vma); > - remove_vma(vma, /* unreachable = */ true); > + vma_mark_detached(vma); > + remove_vma(vma); > count++; > cond_resched(); > vma = vma_next(&vmi); > diff --git a/mm/vma.c b/mm/vma.c > index 4a3deb6f9662..e37eb384d118 100644 > --- a/mm/vma.c > +++ b/mm/vma.c > @@ -406,18 +406,14 @@ static bool can_vma_merge_right(struct vma_merge_struct *vmg, > /* > * Close a vm structure and free it. > */ > -void remove_vma(struct vm_area_struct *vma, bool unreachable) > +void remove_vma(struct vm_area_struct *vma) > { > might_sleep(); > vma_close(vma); > if (vma->vm_file) > fput(vma->vm_file); > mpol_put(vma_policy(vma)); > - if (unreachable) { > - vma_mark_detached(vma); > - __vm_area_free(vma); > - } else > - vm_area_free(vma); > + vm_area_free(vma); > } > > /* > @@ -1199,7 +1195,7 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, > /* Remove and clean up vmas */ > mas_set(mas_detach, 0); > mas_for_each(mas_detach, vma, ULONG_MAX) > - remove_vma(vma, /* unreachable = */ false); > + remove_vma(vma); > > vm_unacct_memory(vms->nr_accounted); > validate_mm(mm); > diff --git a/mm/vma.h b/mm/vma.h > index 18c9e49b1eae..d6803626151d 100644 > --- a/mm/vma.h > +++ b/mm/vma.h > @@ -173,7 +173,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > unsigned long start, size_t len, struct list_head *uf, > bool unlock); > > -void remove_vma(struct vm_area_struct *vma, bool unreachable); > +void remove_vma(struct vm_area_struct *vma); > > void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, > struct vm_area_struct *prev, struct vm_area_struct *next); > diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h > index 1e8cd2f013fa..c7c580ec9a2d 100644 > --- a/tools/testing/vma/vma_internal.h > +++ b/tools/testing/vma/vma_internal.h > @@ -693,14 +693,9 @@ static inline void mpol_put(struct mempolicy *) > { > } > > -static inline void __vm_area_free(struct vm_area_struct *vma) > -{ > - free(vma); > -} > - > static inline void vm_area_free(struct vm_area_struct *vma) > { > - __vm_area_free(vma); > + free(vma); > } > > static inline void lru_add_drain(void) > -- > 2.47.1.613.gc27f4b7a9f-goog >