Re: [PATCH v3] mm: simplify find_vma_prev

KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> · Mon, 12 Dec 2011 18:27:11 +0900

On Mon, 12 Dec 2011 09:49:30 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> wrote:

> On Fri,  9 Dec 2011 17:48:40 -0500
> kosaki.motohiro@xxxxxxxxx wrote:
> 
> > From: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
> > 
> > commit 297c5eee37 (mm: make the vma list be doubly linked) added
> > vm_prev member into vm_area_struct. Therefore we can simplify
> > find_vma_prev() by using it. Also, this change help to improve
> > page fault performance because it has strong locality of reference.
> > 
> > Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
> 
> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
> 

Hmm, your work remind me of a patch I tried in past.
Here is a refleshed one...how do you think ?

==
>From c0261936fc01322d06425731d33f38b2021e8067 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Date: Mon, 12 Dec 2011 18:31:19 +0900
Subject: [PATCH] per thread vma cache.

This is a toy patch. How do you think ?

This is a patch for per-thread mmap_cache without heavy atomic ops.

I'm sure overhead of find_vma() is pretty small in usual application
and this will not show good improvement. But I think, if we need
to have cache of vma, it should be per thread rather than per mm.

This patch adds thread->mmap_cache, a pointer for vm_area_struct
and update it appropriately. Because we have no refcnt on vm_area_struct,
thread->mmap_cache may be a stale pointer. This patch detects stale
pointer by checking

    - thread->mmap_cache is one of SLABs in vm_area_cachep.
    - thread->mmap_cache->vm_mm == mm.

vma->vm_mm will be cleared before kmem_cache_free() by this patch.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>

Note:
   Kosaki's work will merge find_vma_prev() and find_vma().
   Then, we'll cover most of case just by modifying find_vma().
---
 include/linux/mm_types.h |    2 +
 include/linux/sched.h    |    1 +
 include/linux/slab_def.h |   13 ++++++++++
 include/linux/slub_def.h |   12 +++++++++
 init/Kconfig             |    5 ++++
 kernel/fork.c            |    3 +-
 mm/mmap.c                |   61 +++++++++++++++++++++++++++++++++++++++-------
 mm/nommu.c               |    4 +-
 8 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 81a56df..8a9be1a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -255,6 +255,8 @@ struct vm_area_struct {
 #endif
 };
 
+extern void free_vma(struct vm_area_struct *vma);
+
 struct core_thread {
 	struct task_struct *task;
 	struct core_thread *next;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbb5d3e..a161c2b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1294,6 +1294,7 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
+	struct vm_area_struct *mmap_cache;
 #ifdef CONFIG_COMPAT_BRK
 	unsigned brk_randomized:1;
 #endif
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index d00e0ba..763c1d9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -214,4 +214,17 @@ found:
 
 #endif	/* CONFIG_NUMA */
 
+/*
+ * Check the object is under specified kmem_cache.
+ */
+static inline bool is_kmem_cache(void *data, struct kmem_cache *s)
+{
+	struct page *page;
+
+	page = virt_to_head_page(data);
+	if (PageSlab(page) && page->lru.prev == s)
+		return true;
+	return false;
+}
+
 #endif	/* _LINUX_SLAB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a32bcfd..9eba7e7 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
+#include <linux/mm.h>
 
 #include <linux/kmemleak.h>
 
@@ -313,4 +314,15 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 }
 #endif
 
+/*
+ * Check the object is under specified kmem cache.
+ */
+static inline bool is_kmem_cache(void *data, struct kmem_cache *s)
+{
+	struct page *page = virt_to_head_page(data);
+
+	if (PageSlab(page) && page->slab == s)
+		return true;
+	return false;
+}
 #endif /* _LINUX_SLUB_DEF_H */
diff --git a/init/Kconfig b/init/Kconfig
index 6dfc8c3..7fcfffd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1266,6 +1266,11 @@ config SLOB
 
 endchoice
 
+config PER_THREAD_MMAP_CACHE
+	bool
+	default y
+	depends on SLAB || SLUB
+
 config MMAP_ALLOW_UNINITIALIZED
 	bool "Allow mmapped anonymous memory to be uninitialized"
 	depends on EXPERT && !MMU
diff --git a/kernel/fork.c b/kernel/fork.c
index e20518d..18d73c2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -432,7 +432,7 @@ out:
 fail_nomem_anon_vma_fork:
 	mpol_put(pol);
 fail_nomem_policy:
-	kmem_cache_free(vm_area_cachep, tmp);
+	free_vma(tmp);
 fail_nomem:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
@@ -825,6 +825,7 @@ good_mm:
 
 	tsk->mm = mm;
 	tsk->active_mm = mm;
+	tsk->mmap_cache = NULL;
 	return 0;
 
 fail_nomem:
diff --git a/mm/mmap.c b/mm/mmap.c
index 83813fa..7b86e05 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -238,7 +238,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 			removed_exe_file_vma(vma->vm_mm);
 	}
 	mpol_put(vma_policy(vma));
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(vma);
 	return next;
 }
 
@@ -478,8 +478,11 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (next)
 		next->vm_prev = prev;
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
-	if (mm->mmap_cache == vma)
+	if (mm->mmap_cache == vma) {
 		mm->mmap_cache = prev;
+		if (current->mm == mm)
+			current->mmap_cache = prev;
+	}
 }
 
 /*
@@ -642,7 +645,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 			anon_vma_merge(vma, next);
 		mm->map_count--;
 		mpol_put(vma_policy(next));
-		kmem_cache_free(vm_area_cachep, next);
+		free_vma(next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
 		 * we must remove another next too. It would clutter
@@ -1364,7 +1367,7 @@ unmap_and_free_vma:
 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
 	charged = 0;
 free_vma:
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(vma);
 unacct_error:
 	if (charged)
 		vm_unacct_memory(charged);
@@ -1588,10 +1591,42 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
 EXPORT_SYMBOL(get_unmapped_area);
 
+#ifdef CONFIG_PER_THREAD_MMAP_CACHE
+static struct vm_area_struct *thread_mmap_cache(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma = current->mmap_cache;
+
+	if (!vma || current->mm != mm)
+		return NULL;
+
+	if ((vma->vm_mm != mm) || !is_kmem_cache(vma, vm_area_cachep))
+		return NULL;
+
+	return vma;
+}
+
+static void set_thread_mmap_cache(struct mm_struct *mm,
+		struct vm_area_struct *vma)
+{
+	if (current->mm == mm)
+		current->mmap_cache = vma;
+}
+#else
+static struct vm_area_struct *thread_mmap_cache(struct mm_struct *mm)
+{
+	return NULL;
+}
+
+static void set_thread_mmap_cache(struct mm_struct *mm,
+		struct vm_area_struct *vma)
+{
+}
+#endif
+
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
-	struct vm_area_struct *vma = NULL;
+	struct vm_area_struct *vma = thread_mmap_cache(mm);
 
 	if (mm) {
 		/* Check the cache first. */
@@ -1617,8 +1652,10 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 				} else
 					rb_node = rb_node->rb_right;
 			}
-			if (vma)
+			if (vma) {
 				mm->mmap_cache = vma;
+				set_thread_mmap_cache(mm, vma);
+			}
 		}
 	}
 	return vma;
@@ -2017,7 +2054,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
  out_free_mpol:
 	mpol_put(pol);
  out_free_vma:
-	kmem_cache_free(vm_area_cachep, new);
+	free_vma(new);
  out_err:
 	return err;
 }
@@ -2400,7 +2437,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
  out_free_mempol:
 	mpol_put(pol);
  out_free_vma:
-	kmem_cache_free(vm_area_cachep, new_vma);
+	free_vma(new_vma);
 	return NULL;
 }
 
@@ -2506,7 +2543,7 @@ int install_special_mapping(struct mm_struct *mm,
 	return 0;
 
 out:
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(vma);
 	return ret;
 }
 
@@ -2675,6 +2712,12 @@ void mm_drop_all_locks(struct mm_struct *mm)
 	mutex_unlock(&mm_all_locks_mutex);
 }
 
+void free_vma(struct vm_area_struct *vma)
+{
+	vma->vm_mm = NULL;
+	kmem_cache_free(vm_area_cachep, vma);
+}
+
 /*
  * initialise the VMA slab
  */
diff --git a/mm/nommu.c b/mm/nommu.c
index b982290..3c98fd5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -793,7 +793,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 			removed_exe_file_vma(mm);
 	}
 	put_nommu_region(vma->vm_region);
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(vma);
 }
 
 /*
@@ -1443,7 +1443,7 @@ error:
 		fput(vma->vm_file);
 	if (vma->vm_flags & VM_EXECUTABLE)
 		removed_exe_file_vma(vma->vm_mm);
-	kmem_cache_free(vm_area_cachep, vma);
+	free_vma(vma);
 	kleave(" = %d", ret);
 	return ret;
 
-- 
1.7.4.1


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>