[PATCH/RFC 6/8] numa - Migrate-on-Fault - add mbind() MPOL_MF_LAZY flag

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Migrate-on-fault - add MPOL_MF_LAZY ...

This patch adds another mbind() flag to request "lazy migration".
The flag, MPOL_MF_LAZY, modifies MPOL_MF_MOVE* such that the selected
pages are simply unmapped from the calling task's page table ['_MOVE]
or from all referencing page tables [_MOVE_ALL].  Anon pages will first
be added to the swap [or migration?] cache, if necessary.  The pages
will be migrated in the fault path on "first touch", if the policy
dictates at that time.

"Lazy Migration" will allow testing of migrate-on-fault via mbind().
Also allows applications to specify that only subsequently touched
pages be migrated to obey new policy, instead of all pages in range.
This can be useful for multi-threaded applications working on a
large shared data area that is initialized by an initial thread
resulting in all pages on one [or a few, if overflowed] nodes.
After unmap, the pages in regions assigned to the worker threads
will be automatically migrated local to the threads on 1st touch.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx>

 include/linux/mempolicy.h |   15 ++++++-
 include/linux/migrate.h   |    6 +++
 include/linux/rmap.h      |    6 ++-
 mm/mempolicy.c            |   19 ++++++---
 mm/migrate.c              |   92 +++++++++++++++++++++++++++++++++++++++++++++-
 mm/rmap.c                 |    7 ++-
 6 files changed, 129 insertions(+), 16 deletions(-)

Index: linux-2.6.36-mmotm-101103-1217/include/linux/mempolicy.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/mempolicy.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/mempolicy.h
@@ -47,9 +47,18 @@ enum mpol_rebind_step {
 
 /* Flags for mbind */
 #define MPOL_MF_STRICT	(1<<0)	/* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE	(1<<1)	/* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to mapping */
-#define MPOL_MF_INTERNAL (1<<3)	/* Internal flags start here */
+#define MPOL_MF_MOVE	 (1<<1)	/* Move pages owned by this process to conform
+				   to policy */
+#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to policy */
+#define MPOL_MF_LAZY	 (1<<3)	/* Modifies '_MOVE:  lazy migrate on fault */
+#define MPOL_MF_INTERNAL (1<<4)	/* Internal flags start here */
+
+#ifndef CONFIG_MIGRATE_ON_FAULT
+#define MPOL_MF_VALID (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)
+#else
+#define MPOL_MF_VALID (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL \
+			| MPOL_MF_LAZY)
+#endif
 
 /*
  * Internal flags that share the struct mempolicy flags word with
Index: linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/mempolicy.c
+++ linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c
@@ -1220,8 +1220,7 @@ static long do_mbind(unsigned long start
 	int err;
 	LIST_HEAD(pagelist);
 
-	if (flags & ~(unsigned long)(MPOL_MF_STRICT |
-				     MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+  	if (flags & ~(unsigned long)MPOL_MF_VALID)
 		return -EINVAL;
 	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
 		return -EPERM;
@@ -1280,20 +1279,26 @@ static long do_mbind(unsigned long start
 	vma = check_range(mm, start, end, nmask,
 			  flags | MPOL_MF_INVERT, &pagelist);
 
-	err = PTR_ERR(vma);
-	if (!IS_ERR(vma)) {
-		int nr_failed = 0;
-
+	err = PTR_ERR(vma);	/* maybe ... */
+	if (!IS_ERR(vma))
 		err = mbind_range(mm, start, end, new, flags);
 
+	if (!err) {
+		int nr_failed = 0;
+
 		if (!list_empty(&pagelist)) {
+#ifdef CONFIG_MIGRATE_ON_FAULT
+			if (flags & MPOL_MF_LAZY)
+				nr_failed = migrate_pages_unmap_only(&pagelist);
+			else
+#endif
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
 						(unsigned long)vma, 0);
 			if (nr_failed)
 				putback_lru_pages(&pagelist);
 		}
 
-		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
+		if (nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
 	} else
 		putback_lru_pages(&pagelist);
Index: linux-2.6.36-mmotm-101103-1217/include/linux/migrate.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/migrate.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/migrate.h
@@ -30,6 +30,8 @@ extern int migrate_huge_page_move_mappin
 				  struct page *newpage, struct page *page);
 
 #ifdef CONFIG_MIGRATE_ON_FAULT
+extern int migrate_pages_unmap_only(struct list_head *);
+
 extern struct page *check_migrate_misplaced_page(struct page *,
 			struct vm_area_struct *, unsigned long);
 extern struct page *migrate_misplaced_page(struct page *, struct mm_struct *,
@@ -75,4 +77,8 @@ static inline int migrate_huge_page_move
 #define fail_migrate_page NULL
 
 #endif /* CONFIG_MIGRATION */
+
+#ifndef CONFIG_MIGRATE_ON_FAULT
+#define migrate_pages_unmap_only(L) (0)
+#endif
 #endif /* _LINUX_MIGRATE_H */
Index: linux-2.6.36-mmotm-101103-1217/mm/migrate.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/migrate.c
+++ linux-2.6.36-mmotm-101103-1217/mm/migrate.c
@@ -756,7 +756,8 @@ static int unmap_and_move(new_page_t get
 	}
 
 	/* Establish migration ptes or remove ptes */
-	try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+	try_to_unmap(page,
+	             TTU_MIGRATE_DIRECT|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 skip_unmap:
 	if (!page_mapped(page))
@@ -894,6 +895,95 @@ out:
 	return rc;
 }
 
+#ifdef CONFIG_MIGRATE_ON_FAULT
+/*
+ * Lazy migration:  just unmap pages, moving anon pages to swap cache, if
+ * necessary.  Migration will occur, if policy dictates, when a task faults
+ * an unmapped page back into its page table--i.e., on "first touch" after
+ * unmapping.  Note that migrate-on-fault only migrates pages whose mapping
+ * [e.g., file system] supplies a migratepage op, so we skip pages that
+ * wouldn't migrate on fault.
+ *
+ * Pages are placed back on the lru whether or not they were successfully
+ * unmapped.  Like migrate_pages().
+ *
+ * Unline migrate_pages(), this function is only called in the context of
+ * a task that is unmapping it's own pages while holding its map semaphore
+ * for write.
+ */
+int migrate_pages_unmap_only(struct list_head *pagelist)
+{
+	struct page *page;
+	struct page *page2;
+	int nr_failed = 0;
+	int nr_unmapped = 0;
+
+	list_for_each_entry_safe(page, page2, pagelist, lru) {
+		int ret;
+
+		/*
+		 * TODO:  cond_resched() here like migrate_pages()?
+		 */
+
+		list_del(&page->lru);
+		/*
+		 * Give up easily.  We ARE being lazy.
+		 */
+		if (page_count(page) == 1 || !trylock_page(page))
+			goto done_with_page;
+
+		if (PageKsm(page) || PageWriteback(page))
+			goto unlock_page;
+
+		/*
+		 * see comments in unmap_and_move()
+		 */
+		if (!page->mapping)
+			goto unlock_page;
+
+		if (PageAnon(page)) {
+			if (!PageSwapCache(page) && !add_to_swap(page)) {
+				nr_failed++;
+				goto unlock_page;
+			}
+		} else {
+			struct address_space *mapping = page_mapping(page);
+			BUG_ON(!mapping);
+			if (!mapping->a_ops->migratepage)
+				goto unlock_page;
+		}
+
+		ret = try_to_unmap(page,
+	             TTU_MIGRATE_DEFERRED|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+		if (ret != SWAP_SUCCESS || page_mapped(page)) {
+			nr_failed++;
+			goto unlock_page;
+		}
+
+		nr_unmapped++;
+
+	unlock_page:
+		unlock_page(page);
+	done_with_page:
+		putback_lru_page(page);
+
+	}
+
+	/*
+	 * Drain local per cpu pagevecs so fault path can find the the pages
+	 * on the lru.  If we got migrated during the loop above, we may
+	 * have left pages cached on other cpus.  But, we'll live with that
+	 * here to avoid lru_add_drain_all().
+	 * TODO:  mechanism to drain on only those cpus we've been
+	 *        scheduled on between two points--e.g., during the loop.
+	 */
+	if (nr_unmapped)
+		lru_add_drain();
+
+	return nr_failed;
+}
+#endif /* _MIGRATE_ON_FAULT */
+
 /*
  * migrate_pages
  *
Index: linux-2.6.36-mmotm-101103-1217/include/linux/rmap.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/rmap.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/rmap.h
@@ -188,8 +188,9 @@ int page_referenced_one(struct page *, s
 
 enum ttu_flags {
 	TTU_UNMAP = 0,			/* unmap mode */
-	TTU_MIGRATION = 1,		/* migration mode */
-	TTU_MUNLOCK = 2,		/* munlock mode */
+	TTU_MIGRATE_DIRECT = 1,		/* direct migration mode */
+	TTU_MIGRATE_DEFERRED = 2,	/* deferred [lazy] migration mode */
+	TTU_MUNLOCK = 4,		/* munlock mode */
 	TTU_ACTION_MASK = 0xff,
 
 	TTU_IGNORE_MLOCK = (1 << 8),	/* ignore mlock */
@@ -197,6 +198,7 @@ enum ttu_flags {
 	TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
 };
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+#define TTU_MIGRATION (TTU_MIGRATE_DIRECT | TTU_MIGRATE_DEFERRED)
 
 int try_to_unmap(struct page *, enum ttu_flags flags);
 int try_to_unmap_one(struct page *, struct vm_area_struct *,
Index: linux-2.6.36-mmotm-101103-1217/mm/rmap.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/rmap.c
+++ linux-2.6.36-mmotm-101103-1217/mm/rmap.c
@@ -1043,12 +1043,13 @@ int try_to_unmap_one(struct page *page,
 			 * pte. do_swap_page() will wait until the migration
 			 * pte is removed and then restart fault handling.
 			 */
-			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
+			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATE_DIRECT);
 			entry = make_migration_entry(page, pte_write(pteval));
 		}
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 		BUG_ON(pte_file(*pte));
-	} else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
+	} else if (PAGE_MIGRATION &&
+		         (TTU_ACTION(flags) == TTU_MIGRATE_DIRECT)) {
 		/* Establish migration entry for a file page */
 		swp_entry_t entry;
 		entry = make_migration_entry(page, pte_write(pteval));
@@ -1254,7 +1255,7 @@ static int try_to_unmap_anon(struct page
 		 * locking requirements of exec(), migration skips
 		 * temporary VMAs until after exec() completes.
 		 */
-		if (PAGE_MIGRATION && (flags & TTU_MIGRATION) &&
+		if (PAGE_MIGRATION && (flags & TTU_MIGRATE_DIRECT) &&
 				is_vma_temporary_stack(vma))
 			continue;
 
--
To unsubscribe from this list: send the line "unsubscribe linux-numa" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [Devices]

  Powered by Linux