[PATCH/RFC 3/5] numa - migration cache - add move_to_swap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Migration Cache  3/5 - move page from migration to swap cache

This patch modifies the swapfile.c "unuse_*" stack to support
moving pages from migration cache to swap cache in case we have to
"fall back to swap".  This also allows vmscan.c:shrink_page_list()
to move migration cache pages to swap cache when/if it wants to
swap them out.  shrink_page_list() should only find anon pages in the
migration cache when do_mbind() is called with MPOL_MF_MOVE + '_MF_LAZY
or when lazy automigration is enabled.

Because of the new usage, the patch renames the static "unuse_*"
functions in swapfile.c to "update_*".  In "update_pte_range",
if the entry arg matches the page's private data, we perform the
usual "unuse_pte()"; otherwise, this is an "update/move" operation
and we "update_pte()".

Then, this patch implements the __migration_move_to_swap() function
on top of the modified "update_*" stack.

Assumption:  because this facility is used only for removing swap
devices [sys_swapoff()] and swapping out migration cached pages, it
is not in a critical/fast path.

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@xxxxxx>

 include/linux/rmap.h |    4 ++
 include/linux/swap.h |    7 +++-
 mm/rmap.c            |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/swapfile.c        |   76 +++++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 154 insertions(+), 11 deletions(-)

Index: linux-2.6.36-mmotm-101103-1217/include/linux/swap.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/swap.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/swap.h
@@ -8,6 +8,7 @@
 #include <linux/memcontrol.h>
 #include <linux/sched.h>
 #include <linux/node.h>
+#include <linux/rmap.h>
 
 #include <asm/atomic.h>
 #include <asm/page.h>
@@ -405,6 +406,8 @@ extern int  migration_add_reference_page
 extern int migration_ref_count(swp_entry_t);
 extern void __migration_remove_reference(struct page *, swp_entry_t);
 extern struct page *lookup_migration_cache(swp_entry_t);
+extern int __migration_move_to_swap(struct vm_area_struct *, struct page *,
+		swp_entry_t);
 
 #ifdef PAGE_FLAGS_PRIVATE	/* only where this is defined */
 /**
@@ -412,7 +415,7 @@ extern struct page *lookup_migration_cac
  * @page:  page to check/add
  *
  * For vmscan:shrink_page_list():
- * if @page in migration cache, fail -- until "move to swap" available.
+ * if @page in migration cache, try to move to swap cache.
  * if @page already in swap cache -- OK to swap out.
  * else try to add @page to swap cache
  */
@@ -420,7 +423,7 @@ static inline int check_add_to_swap(stru
 {
 	if (PageSwapCache(page)) {
 		if (page_in_migration_cache(page))
-			return 0;	/* Fail -- TODO:  move to swap */
+			return !migration_move_to_swap(page);
 		else
 			return 1;	/* already in swap cache */
 	}
Index: linux-2.6.36-mmotm-101103-1217/mm/swapfile.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/swapfile.c
+++ linux-2.6.36-mmotm-101103-1217/mm/swapfile.c
@@ -896,6 +896,41 @@ unsigned int count_swap_pages(int type,
 #endif /* CONFIG_HIBERNATION */
 
 /*
+ * replace [migration cache] pte in pmd @ addr with swap pte built from
+ * swp_entry_t value in page's private data.  Free [decrement ref count]
+ * previous [migration cache] entry
+ */
+static int update_pte(struct vm_area_struct *vma, pmd_t *pmd,
+		unsigned long addr, swp_entry_t entry, struct page *page)
+{
+#ifdef CONFIG_MIGRATION
+	spinlock_t *ptl;
+	pte_t *pte;
+	int ret = 0;
+
+	BUG_ON(!is_migration_cache(entry));
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	if (likely(pte_same(*pte, swp_entry_to_pte(entry)))) {
+		swp_entry_t new_entry;
+		pte_t new_pte;
+
+		new_entry.val = page_private(page);
+		new_pte       = swp_entry_to_pte(new_entry);
+		set_pte_at(vma->vm_mm, addr, pte, new_pte);
+
+		__migration_remove_reference(NULL, entry);
+		ret = 1;	/* updated -- terminate outer loops */
+	}
+	pte_unmap_unlock(pte, ptl);
+
+	return ret;
+#else
+	BUG();	/* shouldn't get here */
+#endif
+}
+
+/*
  * No need to decide whether this PTE shares the swap entry with others,
  * just let do_wp_page work it out if a write is requested later - to
  * force COW, vm_page_prot omits write permission from any private vma.
@@ -940,7 +975,14 @@ out_nolock:
 	return ret;
 }
 
-static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+/*
+ * @entry contains pte to replace in *pmd
+ * if @entry == page_private(page), "unuse" the swap pte--i.e.,
+ *	replace it with a real anon page pte
+ * else replace the pte with the swap entry in page_private(@page)
+ *	[for moving migration cache pages to swap cache]
+ */
+static int update_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
 				swp_entry_t entry, struct page *page)
 {
@@ -965,7 +1007,10 @@ static int unuse_pte_range(struct vm_are
 		 */
 		if (unlikely(pte_same(*pte, swp_pte))) {
 			pte_unmap(pte);
-			ret = unuse_pte(vma, pmd, addr, entry, page);
+			if (entry.val == page_private(page))
+				ret = unuse_pte(vma, pmd, addr, entry, page);
+			else
+				ret = update_pte(vma, pmd, addr, entry, page);
 			if (ret)
 				goto out;
 			pte = pte_offset_map(pmd, addr);
@@ -976,7 +1021,7 @@ out:
 	return ret;
 }
 
-static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+static inline int update_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
 				swp_entry_t entry, struct page *page)
 {
@@ -989,14 +1034,14 @@ static inline int unuse_pmd_range(struct
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
+		ret = update_pte_range(vma, pmd, addr, next, entry, page);
 		if (ret)
 			return ret;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
 
-static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline int update_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
 				swp_entry_t entry, struct page *page)
 {
@@ -1009,14 +1054,14 @@ static inline int unuse_pud_range(struct
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
+ 		ret = update_pmd_range(vma, pud, addr, next, entry, page);
 		if (ret)
 			return ret;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
-static int unuse_vma(struct vm_area_struct *vma,
+static int update_vma(struct vm_area_struct *vma,
 				swp_entry_t entry, struct page *page)
 {
 	pgd_t *pgd;
@@ -1039,7 +1084,7 @@ static int unuse_vma(struct vm_area_stru
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
+		ret = update_pud_range(vma, pgd, addr, next, entry, page);
 		if (ret)
 			return ret;
 	} while (pgd++, addr = next, addr != end);
@@ -1063,13 +1108,26 @@ static int unuse_mm(struct mm_struct *mm
 		lock_page(page);
 	}
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
+		if (vma->anon_vma && (ret = update_vma(vma, entry, page)))
 			break;
 	}
 	up_read(&mm->mmap_sem);
 	return (ret < 0)? ret: 0;
 }
 
+#ifdef CONFIG_MIGRATION_CACHE
+/*
+ * replace migration cache pte for page with swap pte built
+ * from page_private(page).
+ */
+int __migration_move_to_swap(struct vm_area_struct *vma,
+		struct page *page, swp_entry_t entry)
+{
+	return update_vma(vma, entry, page);
+
+}
+#endif
+
 /*
  * Scan swap_map from current position to next entry still in use.
  * Recycle to start on reaching the end, returning 0 when empty.
Index: linux-2.6.36-mmotm-101103-1217/include/linux/rmap.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/rmap.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/rmap.h
@@ -204,6 +204,10 @@ int try_to_unmap(struct page *, enum ttu
 int try_to_unmap_one(struct page *, struct vm_area_struct *,
 			unsigned long address, enum ttu_flags flags);
 
+#ifdef CONFIG_MIGRATION_CACHE
+int migration_move_to_swap(struct page *);
+#endif
+
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
  */
Index: linux-2.6.36-mmotm-101103-1217/mm/rmap.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/rmap.c
+++ linux-2.6.36-mmotm-101103-1217/mm/rmap.c
@@ -355,6 +355,84 @@ void page_unlock_anon_vma(struct anon_vm
 	rcu_read_unlock();
 }
 
+#ifdef CONFIG_MIGRATION_CACHE
+/*
+ * Move a page in the migration cache to the swap cache when
+ * vmscan finds anon page swap candidate in migration cache.
+ * Return !0 on success; 0 otherwise
+ *
+ * Must hold page lock.
+ */
+int migration_move_to_swap(struct page *page)
+{
+	struct anon_vma *anon_vma;
+	struct anon_vma_chain *avc;
+	swp_entry_t entry;
+	int moved = 0;
+	int ret = 0;
+
+	BUG_ON(!PageLocked(page));
+	BUG_ON(!page_in_migration_cache(page));
+
+	/*
+	 * Optimistically add page to swap cache
+	 */
+	entry.val = page_private(page);	/* save for move */
+	set_page_private(page, 0);	/* prepare for __add_to_swap() */
+	ClearPageSwapCache(page);
+	if (!__add_to_swap(page, 0))
+		goto out;
+
+	anon_vma = page_lock_anon_vma(page);
+	if (!anon_vma) {
+		delete_from_swap_cache(page);	/* back out */
+		goto out; /* nothing to move */
+	}
+
+	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+		struct vm_area_struct *vma = avc->vma;
+		if (!__migration_move_to_swap(vma, page, entry)) {
+			page_unlock_anon_vma(anon_vma);
+			/*
+			 * If we've moved any pages, we're left with page
+			 * partially in migration cache, partially in swap
+			 * cache.  Can't be good!
+			 */
+			if (moved) {
+				printk (KERN_ERR
+					"%s failed after moving %d entries\n",
+					__FUNCTION__, moved);
+				BUG();
+			}
+			goto out;
+		}
+		moved++;
+	}
+
+	page_unlock_anon_vma(anon_vma);
+
+	/*
+	 * __add_to_swap() added another ref to page for swap cache.
+	 * __migration_move_to_swap() did NOT remove the migration
+	 * cache's ref on the page, so drop it here, after replacing
+	 * all migration ptes.
+	 */
+	page_cache_release(page);
+	ret = 1;
+
+out:
+	if (!ret) {
+		/*
+		 * restore migration cache entry on error.
+		 */
+		set_page_private(page, entry.val);
+		SetPageSwapCache(page);
+	}
+	return ret;
+}
+#endif /* _MIGRATION_CACHE */
+
+
 /*
  * At what user virtual address is page expected in @vma?
  * Returns virtual address or -EFAULT if page's index/offset is not
--
To unsubscribe from this list: send the line "unsubscribe linux-numa" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [Devices]

  Powered by Linux