[RFC PATCH 2/4] vrange: Introduce vrange_root to make vrange structures more flexible

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Instead of having the vrange trees hanging directly off of the
mm_struct, use a vrange_root structure, which will allow us
to have vrange_roots that hang off the mm_struct for anonomous
memory, as well as address_space structures for file backed memory.

Cc: linux-mm@xxxxxxxxx
Cc: Michael Kerrisk <mtk.manpages@xxxxxxxxx>
Cc: Arun Sharma <asharma@xxxxxx>
Cc: Mel Gorman <mel@xxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Dave Hansen <dave@xxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Neil Brown <neilb@xxxxxxx>
Cc: Mike Hommey <mh@xxxxxxxxxxxx>
Cc: Taras Glek <tglek@xxxxxxxxxxx>
Cc: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Cc: Jason Evans <je@xxxxxx>
Cc: sanjay@xxxxxxxxxx
Cc: Paul Turner <pjt@xxxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: Michel Lespinasse <walken@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Signed-off-by: John Stultz <john.stultz@xxxxxxxxxx>
---
 fs/proc/task_mmu.c           |   10 +--
 include/linux/mm_types.h     |    4 +-
 include/linux/vrange.h       |   35 +++++-----
 include/linux/vrange_types.h |   21 ++++++
 kernel/fork.c                |    2 +-
 mm/vrange.c                  |  156 ++++++++++++++++++++++--------------------
 6 files changed, 126 insertions(+), 102 deletions(-)
 create mode 100644 include/linux/vrange_types.h

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index df009f0..11f63d4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -391,13 +391,13 @@ static void *v_start(struct seq_file *m, loff_t *pos)
 	if (!mm || IS_ERR(mm))
 		return mm;
 
-	vrange_lock(mm);
-	root = &mm->v_rb;
+	vrange_lock(&mm->vroot);
+	root = &mm->vroot.v_rb;
 
-	if (RB_EMPTY_ROOT(&mm->v_rb))
+	if (RB_EMPTY_ROOT(&mm->vroot.v_rb))
 		goto out;
 
-	next = rb_first(&mm->v_rb);
+	next = rb_first(&mm->vroot.v_rb);
 	range = vrange_entry(next);
 	while(n > 0 && range) {
 		n--;
@@ -432,7 +432,7 @@ static void v_stop(struct seq_file *m, void *v)
 	struct proc_vrange_private *priv = m->private;
 	if (priv->task) {
 		struct mm_struct *mm = priv->task->mm;
-		vrange_unlock(mm);
+		vrange_unlock(&mm->vroot);
 		mmput(mm);
 		put_task_struct(priv->task);
 	}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 080bf74..2e02a6d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -14,6 +14,7 @@
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
 #include <linux/mutex.h>
+#include <linux/vrange_types.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 
@@ -353,8 +354,7 @@ struct mm_struct {
 
 
 #ifdef CONFIG_MMU
-	struct rb_root v_rb;		/* vrange rb tree */
-	struct mutex v_lock;		/* Protect v_rb */
+	struct vrange_root vroot;
 #endif
 	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
 	unsigned long hiwater_vm;	/* High-water virtual memory usage */
diff --git a/include/linux/vrange.h b/include/linux/vrange.h
index 4bcec40..b9b219c 100644
--- a/include/linux/vrange.h
+++ b/include/linux/vrange.h
@@ -1,42 +1,39 @@
 #ifndef _LINUX_VRANGE_H
 #define _LINUX_VRANGE_H
 
-#include <linux/mutex.h>
-#include <linux/interval_tree.h>
+#include <linux/vrange_types.h>
 #include <linux/mm.h>
 
-struct vrange {
-	struct interval_tree_node node;
-	bool purged;
-	struct mm_struct *mm;
-	struct list_head lru; /* protected by lru_lock */
-	atomic_t refcount;
-};
-
 #define vrange_entry(ptr) \
 	container_of(ptr, struct vrange, node.rb)
 
 #ifdef CONFIG_MMU
-struct mm_struct;
 
 static inline void mm_init_vrange(struct mm_struct *mm)
 {
-	mm->v_rb = RB_ROOT;
-	mutex_init(&mm->v_lock);
+	mm->vroot.v_rb = RB_ROOT;
+	mutex_init(&mm->vroot.v_lock);
+}
+
+static inline void vrange_lock(struct vrange_root *vroot)
+{
+	mutex_lock(&vroot->v_lock);
 }
 
-static inline void vrange_lock(struct mm_struct *mm)
+static inline void vrange_unlock(struct vrange_root *vroot)
 {
-	mutex_lock(&mm->v_lock);
+	mutex_unlock(&vroot->v_lock);
 }
 
-static inline void vrange_unlock(struct mm_struct *mm)
+static inline struct mm_struct *vrange_get_owner_mm(struct vrange *vrange)
 {
-	mutex_unlock(&mm->v_lock);
+
+	return container_of(vrange->owner, struct mm_struct, vroot);
 }
 
-extern void exit_vrange(struct mm_struct *mm);
+
 void vrange_init(void);
+extern void mm_exit_vrange(struct mm_struct *mm);
 int discard_vpage(struct page *page);
 bool vrange_address(struct mm_struct *mm, unsigned long start,
 			unsigned long end);
@@ -50,7 +47,7 @@ void lru_move_vrange_to_head(struct mm_struct *mm, unsigned long address);
 
 static inline void vrange_init(void) {};
 static inline void mm_init_vrange(struct mm_struct *mm) {};
-static inline void exit_vrange(struct mm_struct *mm);
+static inline void mm_exit_vrange(struct mm_struct *mm);
 
 static inline bool vrange_address(struct mm_struct *mm, unsigned long start,
 		unsigned long end) { return false; };
diff --git a/include/linux/vrange_types.h b/include/linux/vrange_types.h
new file mode 100644
index 0000000..bede336
--- /dev/null
+++ b/include/linux/vrange_types.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_VRANGE_TYPES_H
+#define _LINUX_VRANGE_TYPES_H
+
+#include <linux/mutex.h>
+#include <linux/interval_tree.h>
+
+struct vrange_root {
+	struct rb_root v_rb;		/* vrange rb tree */
+	struct mutex v_lock;		/* Protect v_rb */
+};
+
+
+struct vrange {
+	struct interval_tree_node node;
+	struct vrange_root *owner;
+	bool purged;
+	struct list_head lru; /* protected by lru_lock */
+	atomic_t refcount;
+};
+#endif
+
diff --git a/kernel/fork.c b/kernel/fork.c
index e3aa120..f2da4a0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -614,7 +614,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		uprobe_clear_state(mm);
-		exit_vrange(mm);
+		mm_exit_vrange(mm);
 		exit_aio(mm);
 		ksm_exit(mm);
 		khugepaged_exit(mm); /* must run before exit_mmap */
diff --git a/mm/vrange.c b/mm/vrange.c
index d07884d..9facbbc 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -39,10 +39,12 @@ void __init vrange_init(void)
 }
 
 static inline void __set_vrange(struct vrange *range,
-		unsigned long start_idx, unsigned long end_idx)
+		unsigned long start_idx, unsigned long end_idx,
+		bool purged)
 {
 	range->node.start = start_idx;
 	range->node.last = end_idx;
+	range->purged = purged;
 }
 
 static void lru_add_vrange(struct vrange *vrange)
@@ -63,12 +65,13 @@ static void lru_remove_vrange(struct vrange *vrange)
 
 void lru_move_vrange_to_head(struct mm_struct *mm, unsigned long address)
 {
-	struct rb_root *root = &mm->v_rb;
+	struct vrange_root *vroot = &mm->vroot;
 	struct interval_tree_node *node;
 	struct vrange *vrange;
 
-	vrange_lock(mm);
-	node = interval_tree_iter_first(root, address, address + PAGE_SIZE - 1);
+	vrange_lock(vroot);
+	node = interval_tree_iter_first(&vroot->v_rb, address,
+						address + PAGE_SIZE - 1);
 	if (node) {
 		vrange = container_of(node, struct vrange, node);
 		spin_lock(&lru_lock);
@@ -81,22 +84,21 @@ void lru_move_vrange_to_head(struct mm_struct *mm, unsigned long address)
 			list_move(&vrange->lru, &lru_vrange);
 		spin_unlock(&lru_lock);
 	}
-	vrange_unlock(mm);
+	vrange_unlock(vroot);
 }
 
-static void __add_range(struct vrange *range,
-			struct rb_root *root, struct mm_struct *mm)
+static void __add_range(struct vrange *range, struct vrange_root *vroot)
 {
-	range->mm = mm;
+	range->owner = vroot;
 	lru_add_vrange(range);
-	interval_tree_insert(&range->node, root);
+	interval_tree_insert(&range->node, &vroot->v_rb);
 }
 
 /* remove range from interval tree */
-static void __remove_range(struct vrange *range,
-				struct rb_root *root)
+static void __remove_range(struct vrange *range)
 {
-	interval_tree_remove(&range->node, root);
+	interval_tree_remove(&range->node, &range->owner->v_rb);
+	range->owner = NULL;
 }
 
 static struct vrange *alloc_vrange(void)
@@ -104,11 +106,13 @@ static struct vrange *alloc_vrange(void)
 	struct vrange *vrange = kmem_cache_alloc(vrange_cachep, GFP_KERNEL);
 	if (vrange)
 		atomic_set(&vrange->refcount, 1);
+	vrange->owner = NULL;
 	return vrange;
 }
 
 static void free_vrange(struct vrange *range)
 {
+	WARN_ON(range->owner);
 	lru_remove_vrange(range);
 	kmem_cache_free(vrange_cachep, range);
 }
@@ -120,20 +124,20 @@ static void put_vrange(struct vrange *range)
 		free_vrange(range);
 }
 
-static inline void range_resize(struct rb_root *root,
-		struct vrange *range,
-		unsigned long start, unsigned long end,
-		struct mm_struct *mm)
+static inline void range_resize(struct vrange *range,
+		unsigned long start, unsigned long end)
 {
-	__remove_range(range, root);
-	__set_vrange(range, start, end);
-	__add_range(range, root, mm);
+	struct vrange_root *vroot = range->owner;
+	bool purged = range->purged;
+
+	__remove_range(range);
+	__set_vrange(range, start, end, purged);
+	__add_range(range, vroot);
 }
 
-static int add_vrange(struct mm_struct *mm,
+static int add_vrange(struct vrange_root *vroot,
 			unsigned long start, unsigned long end)
 {
-	struct rb_root *root;
 	struct vrange *new_range, *range;
 	struct interval_tree_node *node, *next;
 	int purged = 0;
@@ -142,9 +146,8 @@ static int add_vrange(struct mm_struct *mm,
 	if (!new_range)
 		return -ENOMEM;
 
-	root = &mm->v_rb;
-	vrange_lock(mm);
-	node = interval_tree_iter_first(root, start, end);
+	vrange_lock(vroot);
+	node = interval_tree_iter_first(&vroot->v_rb, start, end);
 	while (node) {
 		next = interval_tree_iter_next(node, start, end);
 
@@ -158,24 +161,22 @@ static int add_vrange(struct mm_struct *mm,
 		end = max_t(unsigned long, end, node->last);
 
 		purged |= range->purged;
-		__remove_range(range, root);
+		__remove_range(range);
 		put_vrange(range);
 
 		node = next;
 	}
 
-	__set_vrange(new_range, start, end);
-	new_range->purged = purged;
-	__add_range(new_range, root, mm);
+	__set_vrange(new_range, start, end, purged);
+	__add_range(new_range, vroot);
 out:
-	vrange_unlock(mm);
+	vrange_unlock(vroot);
 	return 0;
 }
 
-static int remove_vrange(struct mm_struct *mm,
+static int remove_vrange(struct vrange_root *vroot,
 		unsigned long start, unsigned long end)
 {
-	struct rb_root *root;
 	struct vrange *new_range, *range;
 	struct interval_tree_node *node, *next;
 	int ret	= 0;
@@ -185,10 +186,9 @@ static int remove_vrange(struct mm_struct *mm,
 	if (!new_range)
 		return -ENOMEM;
 
-	root = &mm->v_rb;
-	vrange_lock(mm);
+	vrange_lock(vroot);
 
-	node = interval_tree_iter_first(root, start, end);
+	node = interval_tree_iter_first(&vroot->v_rb, start, end);
 	while (node) {
 		next = interval_tree_iter_next(node, start, end);
 
@@ -196,42 +196,40 @@ static int remove_vrange(struct mm_struct *mm,
 		ret |= range->purged;
 
 		if (start <= node->start && end >= node->last) {
-			__remove_range(range, root);
+			__remove_range(range);
 			put_vrange(range);
 		} else if (node->start >= start) {
-			range_resize(root, range, end, node->last, mm);
+			range_resize(range, end, node->last);
 		} else if (node->last <= end) {
-			range_resize(root, range, node->start, start, mm);
+			range_resize(range, node->start, start);
 		} else {
 			used_new = true;
-			__set_vrange(new_range, end, node->last);
-			new_range->purged = range->purged;
-			new_range->mm = mm;
-			range_resize(root, range, node->start, start, mm);
-			__add_range(new_range, root, mm);
+			__set_vrange(new_range, end, node->last, range->purged);
+			range_resize(range, node->start, start);
+			__add_range(new_range, vroot);
 			break;
 		}
 
 		node = next;
 	}
 
-	vrange_unlock(mm);
+	vrange_unlock(vroot);
 	if (!used_new)
 		put_vrange(new_range);
 
 	return ret;
 }
 
-void exit_vrange(struct mm_struct *mm)
+void mm_exit_vrange(struct mm_struct *mm)
 {
 	struct vrange *range;
 	struct rb_node *next;
 
-	next = rb_first(&mm->v_rb);
+	next = rb_first(&mm->vroot.v_rb);
 	while (next) {
 		range = vrange_entry(next);
 		next = rb_next(next);
-		__remove_range(range, &mm->v_rb);
+		__remove_range(range);
 		put_vrange(range);
 	}
 }
@@ -285,17 +283,18 @@ SYSCALL_DEFINE4(vrange, unsigned long, start,
 		goto out;
 
 	if (mode == VRANGE_VOLATILE)
-		ret = add_vrange(mm, start, end - 1);
+		ret = add_vrange(&mm->vroot, start, end - 1);
 	else if (mode == VRANGE_NOVOLATILE)
-		ret = remove_vrange(mm, start, end - 1);
+		ret = remove_vrange(&mm->vroot, start, end - 1);
 out:
 	return ret;
 }
 
+
 static bool __vrange_address(struct mm_struct *mm,
 			unsigned long start, unsigned long end)
 {
-	struct rb_root *root = &mm->v_rb;
+	struct rb_root *root = &mm->vroot.v_rb;
 	struct interval_tree_node *node;
 
 	node = interval_tree_iter_first(root, start, end);
@@ -306,10 +305,11 @@ bool vrange_address(struct mm_struct *mm,
 			unsigned long start, unsigned long end)
 {
 	bool ret;
+	struct vrange_root *vroot = &mm->vroot;
 
-	vrange_lock(mm);
+	vrange_lock(vroot);
 	ret = __vrange_address(mm, start, end);
-	vrange_unlock(mm);
+	vrange_unlock(vroot);
 	return ret;
 }
 
@@ -372,14 +372,13 @@ static inline pte_t *vpage_check_address(struct page *page,
 	return ptep;
 }
 
-static void __vrange_purge(struct mm_struct *mm,
+static void __vrange_purge(struct vrange_root *vroot,
 		unsigned long start, unsigned long end)
 {
-	struct rb_root *root = &mm->v_rb;
-	struct vrange *range;
 	struct interval_tree_node *node;
+	struct vrange *range;
 
-	node = interval_tree_iter_first(root, start, end);
+	node = interval_tree_iter_first(&vroot->v_rb, start, end);
 	while (node) {
 		range = container_of(node, struct vrange, node);
 		range->purged = true;
@@ -396,20 +395,19 @@ static int try_to_discard_one(struct page *page, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	int ret = 0;
 	bool present;
+	struct vrange_root *vroot = &mm->vroot;
 
 	VM_BUG_ON(!PageLocked(page));
 
-	vrange_lock(mm);
+	vrange_lock(vroot);
 	pte = vpage_check_address(page, mm, address, &ptl);
 	if (!pte) {
-		vrange_unlock(mm);
 		goto out;
 	}
 
 	if (vma->vm_flags & VM_LOCKED) {
 		pte_unmap_unlock(pte, ptl);
-		vrange_unlock(mm);
-		return 0;
+		goto out;
 	}
 
 	present = pte_present(*pte);
@@ -431,12 +429,13 @@ static int try_to_discard_one(struct page *page, struct vm_area_struct *vma,
 	}
 
 	set_pte_at(mm, address, pte, pteval);
-	__vrange_purge(mm, address, address + PAGE_SIZE -1);
+	__vrange_purge(&mm->vroot, address, address + PAGE_SIZE - 1);
 	pte_unmap_unlock(pte, ptl);
 	mmu_notifier_invalidate_page(mm, address);
-	vrange_unlock(mm);
 	ret = 1;
+
 out:
+	vrange_unlock(vroot);
 	return ret;
 }
 
@@ -458,12 +457,14 @@ static int try_to_discard_vpage(struct page *page)
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
 		pte_t *pte;
 		spinlock_t *ptl;
+		struct vrange_root *vroot;
 
 		vma = avc->vma;
 		mm = vma->vm_mm;
+		vroot = &mm->vroot;
 		address = vma_address(page, vma);
 
-		vrange_lock(mm);
+		vrange_lock(vroot);
 		/*
 		 * We can't use page_check_address because it doesn't check
 		 * swap entry of the page table. We need the check because
@@ -473,24 +474,24 @@ static int try_to_discard_vpage(struct page *page)
 		 */
 		pte = vpage_check_address(page, mm, address, &ptl);
 		if (!pte) {
-			vrange_unlock(mm);
+			vrange_unlock(vroot);
 			continue;
 		}
 
 		if (vma->vm_flags & VM_LOCKED) {
 			pte_unmap_unlock(pte, ptl);
-			vrange_unlock(mm);
+			vrange_unlock(vroot);
 			goto out;
 		}
 
 		pte_unmap_unlock(pte, ptl);
 		if (!__vrange_address(mm, address,
 					address + PAGE_SIZE - 1)) {
-			vrange_unlock(mm);
+			vrange_unlock(vroot);
 			goto out;
 		}
 
-		vrange_unlock(mm);
+		vrange_unlock(vroot);
 	}
 
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
@@ -531,19 +532,20 @@ int discard_vpage(struct page *page)
 
 bool is_purged_vrange(struct mm_struct *mm, unsigned long address)
 {
-	struct rb_root *root = &mm->v_rb;
+	struct vrange_root *vroot = &mm->vroot;
 	struct interval_tree_node *node;
 	struct vrange *range;
 	bool ret = false;
 
-	vrange_lock(mm);
-	node = interval_tree_iter_first(root, address, address + PAGE_SIZE - 1);
+	vrange_lock(vroot);
+	node = interval_tree_iter_first(&vroot->v_rb, address,
+						address + PAGE_SIZE - 1);
 	if (node) {
 		range = container_of(node, struct vrange, node);
 		if (range->purged)
 			ret = true;
 	}
-	vrange_unlock(mm);
+	vrange_unlock(vroot);
 	return ret;
 }
 
@@ -631,12 +633,14 @@ static unsigned int discard_vma_pages(struct zone *zone, struct mm_struct *mm,
 unsigned int discard_vrange(struct zone *zone, struct vrange *vrange,
 				int nr_to_discard)
 {
-	struct mm_struct *mm = vrange->mm;
+	struct mm_struct *mm;
 	unsigned long start = vrange->node.start;
 	unsigned long end = vrange->node.last;
 	struct vm_area_struct *vma;
 	unsigned int nr_discarded = 0;
 
+	mm = vrange_get_owner_mm(vrange);
+
 	if (!down_read_trylock(&mm->mmap_sem))
 		goto out;
 
@@ -678,7 +682,7 @@ static struct vrange *get_victim_vrange(void)
 	spin_lock(&lru_lock);
 	list_for_each_prev_safe(cur, tmp, &lru_vrange) {
 		vrange = list_entry(cur, struct vrange, lru);
-		mm = vrange->mm;
+		mm = vrange_get_owner_mm(vrange);
 		/* the process is exiting so pass it */
 		if (atomic_read(&mm->mm_users) == 0) {
 			list_del_init(&vrange->lru);
@@ -698,7 +702,7 @@ static struct vrange *get_victim_vrange(void)
 		 * need to get a refcount of mm.
 		 * NOTE: We guarantee mm_count isn't zero in here because
 		 * if we found vrange from LRU list, it means we are
-		 * before exit_vrange or remove_vrange.
+		 * before mm_exit_vrange or remove_vrange.
 		 */
 		atomic_inc(&mm->mm_count);
 
@@ -713,8 +717,10 @@ static struct vrange *get_victim_vrange(void)
 
 static void put_victim_range(struct vrange *vrange)
 {
+	struct mm_struct *mm = vrange_get_owner_mm(vrange);
+
 	put_vrange(vrange);
-	mmdrop(vrange->mm);
+	mmdrop(mm);
 }
 
 unsigned int discard_vrange_pages(struct zone *zone, int nr_to_discard)
@@ -724,7 +730,7 @@ unsigned int discard_vrange_pages(struct zone *zone, int nr_to_discard)
 
 	start_vrange = vrange = get_victim_vrange();
 	if (start_vrange) {
-		struct mm_struct *mm = start_vrange->mm;
+		struct mm_struct *mm = vrange_get_owner_mm(vrange);
 		atomic_inc(&start_vrange->refcount);
 		atomic_inc(&mm->mm_count);
 	}
-- 
1.7.10.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]