To enable SLAB_TYPESAFE_BY_RCU for vma cache we need to ensure that object reuse before RCU grace period is over will be detected inside lock_vma_under_rcu(). lock_vma_under_rcu() enters RCU read section, finds the vma at the given address, locks the vma and checks if it got detached or remapped to cover a different address range. These last checks are there to ensure that the vma was not modified after we found it but before locking it. Vma reuse introduces a possibility that in between those events of finding and locking the vma, it can get detached, reused, added into a tree and be marked as attached. Current checks will help detecting cases when: - vma was reused but not yet added into the tree (detached check) - vma was reused at a different address range (address check) If vma is covering a new address range which still includes the address we were looking for, it's not a problem unless the reused vma was added into a different address space. Therefore checking that vma->vm_mm is still the same is the the only missing check to detect vma reuse. Add this missing check into lock_vma_under_rcu() and change vma cache to include SLAB_TYPESAFE_BY_RCU. This will facilitate vm_area_struct reuse and will minimize the number of call_rcu() calls. Adding vm_freeptr into vm_area_struct avoids bloating that structure. lock_vma_under_rcu() checks of the detached flag guarantees that vma is valid and attached to a tree, therefore unioning vm_freeptr with vm_start/vm_end is not an issue even though lock_vma_under_rcu() is using them. As part of this change freeptr_t declaration is moved into mm_types.h to avoid circular dependencies between mm_types.h and slab.h. Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> --- include/linux/mm_types.h | 10 +++++++--- include/linux/slab.h | 6 ------ kernel/fork.c | 29 +++++++++++++---------------- mm/memory.c | 2 +- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5c4bfdcfac72..37580cc7bec0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -32,6 +32,12 @@ struct address_space; struct mem_cgroup; +/* + * freeptr_t represents a SLUB freelist pointer, which might be encoded + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. + */ +typedef struct { unsigned long v; } freeptr_t; + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -673,9 +679,7 @@ struct vm_area_struct { unsigned long vm_start; unsigned long vm_end; }; -#ifdef CONFIG_PER_VMA_LOCK - struct rcu_head vm_rcu; /* Used for deferred freeing. */ -#endif + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ }; /* diff --git a/include/linux/slab.h b/include/linux/slab.h index b35e2db7eb0e..cb45db2402ac 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -212,12 +212,6 @@ enum _slab_flag_bits { #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED #endif -/* - * freeptr_t represents a SLUB freelist pointer, which might be encoded - * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. - */ -typedef struct { unsigned long v; } freeptr_t; - /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * diff --git a/kernel/fork.c b/kernel/fork.c index 7823797e31d2..946c3f9a9342 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -478,25 +478,15 @@ void __vm_area_free(struct vm_area_struct *vma) kmem_cache_free(vm_area_cachep, vma); } -#ifdef CONFIG_PER_VMA_LOCK -static void vm_area_free_rcu_cb(struct rcu_head *head) +void vm_area_free(struct vm_area_struct *vma) { - struct vm_area_struct *vma = container_of(head, struct vm_area_struct, - vm_rcu); - +#ifdef CONFIG_PER_VMA_LOCK + /* The vma should be detached while being destroyed. */ + VM_BUG_ON_VMA(!is_vma_detached(vma), vma); /* The vma should not be locked while being destroyed. */ VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock.lock), vma); - __vm_area_free(vma); -} #endif - -void vm_area_free(struct vm_area_struct *vma) -{ -#ifdef CONFIG_PER_VMA_LOCK - call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb); -#else __vm_area_free(vma); -#endif } static void account_kernel_stack(struct task_struct *tsk, int account) @@ -3115,6 +3105,11 @@ void __init mm_cache_init(void) void __init proc_caches_init(void) { + struct kmem_cache_args args = { + .use_freeptr_offset = true, + .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr), + }; + sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -3131,9 +3126,11 @@ void __init proc_caches_init(void) sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); - vm_area_cachep = KMEM_CACHE(vm_area_struct, - SLAB_HWCACHE_ALIGN|SLAB_NO_MERGE|SLAB_PANIC| + vm_area_cachep = kmem_cache_create("vm_area_struct", + sizeof(struct vm_area_struct), &args, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| SLAB_ACCOUNT); + mmap_init(); nsproxy_cache_init(); } diff --git a/mm/memory.c b/mm/memory.c index d0197a0c0996..9c414c81f14a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6279,7 +6279,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, goto inval; /* Check if the VMA got isolated after we found it */ - if (is_vma_detached(vma)) { + if (is_vma_detached(vma) || vma->vm_mm != mm) { vma_end_read(vma); count_vm_vma_lock_event(VMA_LOCK_MISS); /* The area was replaced with another one */ -- 2.47.0.277.g8800431eea-goog