[PATCH/RFC 8/11] numa - Automatic-migration - per cpuset max mapcount control

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



AutoPage Migration - add max mapcount migration threshold

This patch adds an additional per cpuset migration control that
allows one to vary the page mapcount threshold above which pages
will not be migrated by MPOL_MF_MOVE.  The default value is 1,
which yields the same behavior as before this patch.

This is useful because anon pages can be shared between ancestors
and descendants until sharing is broken by a write.  We want to
be able to unmap these pages for lazy, automigration so that the
next touch will migrate the page local to the task that touches
it.  However, we still want a threshold above which we don't
attempt to migrate the page because unmap is very expensive when
a page has a large mapcount.

We add the threshold to the task structure so that we can fetch
it using a static inline function that is redefined as to return
the default value of 1 when AUTO_MIGRATION is not configured.
The max mapcount is accessed for each page proposed for migration
and we don't want to call a cpuset function and take an
rcu_lock/unlock round trip for each page.

Note:  This threshold could be configured under MIGRATE_ON_FAULT
instead of AUTO_MIGRATION or independently of either, as it is
useful for mbind() with MPOL_MF_MOVE as well.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx>

 include/linux/auto-migrate.h |    4 ++++
 include/linux/sched.h        |    1 +
 kernel/cpuset.c              |   42 +++++++++++++++++++++++++++++++++++++++++-
 mm/mempolicy.c               |    8 +++++---
 4 files changed, 51 insertions(+), 4 deletions(-)

Index: linux-2.6.36-mmotm-101103-1217/include/linux/auto-migrate.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/auto-migrate.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/auto-migrate.h
@@ -77,6 +77,10 @@ static inline void check_migrate_pending
 }
 #endif /* SCHED_AUTO_MIGRATION */
 
+static inline unsigned int migrate_max_mapcount(struct task_struct *task)
+{
+	return task->migrate_max_mapcount;
+}
 #else	/* !CONFIG_AUTO_MIGRATION */
 
 static inline int is_auto_migration(int flags)
Index: linux-2.6.36-mmotm-101103-1217/kernel/cpuset.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/kernel/cpuset.c
+++ linux-2.6.36-mmotm-101103-1217/kernel/cpuset.c
@@ -101,6 +101,7 @@ struct cpuset {
 	struct fmeter fmeter;		/* memory_pressure filter */
 
 	unsigned long auto_migrate_interval;
+	unsigned int migrate_max_mapcount;
 
 	/* partition number for rebuild_sched_domains() */
 	int pn;
@@ -200,6 +201,7 @@ static inline int is_auto_migrate(const
 static struct cpuset top_cpuset = {
 	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
 	.auto_migrate_interval = AUTO_MIGRATE_INTERVAL_DFLT,
+	.migrate_max_mapcount = 1,
 };
 
 /*
@@ -365,8 +367,11 @@ static void cpuset_update_task_cpuset_fl
 	if (is_auto_migrate(cs)) {
 		set_auto_migrate_enabled(tsk, 1);
 		tsk->auto_migrate_interval = cs->auto_migrate_interval;
-	} else
+		tsk->migrate_max_mapcount  = cs->migrate_max_mapcount;
+	} else {
 		set_auto_migrate_enabled(tsk, 0);
+		tsk->migrate_max_mapcount  = 1;
+	}
 
 }
 
@@ -1553,6 +1558,23 @@ static int update_auto_migrate_interval(
 	return 0;
 }
 
+/*
+ * Call with manage_mutex held.
+ */
+static int update_migrate_max_mapcount(struct cpuset *cs, u64 val)
+{
+	unsigned int n = val;
+
+	if (n == cs->migrate_max_mapcount)
+		return 0;
+
+	if (n < 1)
+		cs->migrate_max_mapcount = 1;
+	else
+		cs->migrate_max_mapcount = n;
+	return 0;
+}
+
 /* The various types of files and directories in a cpuset file system */
 
 typedef enum {
@@ -1573,6 +1595,7 @@ typedef enum {
 	FILE_MIGRATE_ON_FAULT,
 	FILE_AUTO_MIGRATE,
 	FILE_AUTO_MIGRATE_INTERVAL,
+	FILE_MIGRATE_MAX_MAPCOUNT,
 } cpuset_filetype_t;
 
 static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
@@ -1627,6 +1650,9 @@ static int cpuset_write_u64(struct cgrou
 	case FILE_AUTO_MIGRATE_INTERVAL:
 		retval = update_auto_migrate_interval(cs, val);
 		break;
+	case FILE_MIGRATE_MAX_MAPCOUNT:
+		retval = update_migrate_max_mapcount(cs, val);
+		break;
 	default:
 		retval = -EINVAL;
 		break;
@@ -1759,6 +1785,9 @@ static ssize_t cpuset_common_file_read(s
 	case FILE_AUTO_MIGRATE_INTERVAL:
 		s += sprintf(s, "%ld", cs->auto_migrate_interval / HZ);
 		break;
+	case FILE_MIGRATE_MAX_MAPCOUNT:
+		s += sprintf(s, "%d", cs->migrate_max_mapcount);
+		break;
 	default:
 		retval = -EINVAL;
 		goto out;
@@ -1954,6 +1983,13 @@ static struct cftype cft_auto_migrate_in
 	.private = FILE_AUTO_MIGRATE_INTERVAL,
 };
 
+static struct cftype cft_migrate_max_mapcount = {
+	.name = "migrate_max_mapcount",
+	.read = cpuset_common_file_read,
+	.write_u64 = cpuset_write_u64,
+	.private = FILE_MIGRATE_MAX_MAPCOUNT,
+};
+
 static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
 {
 	int err;
@@ -1980,6 +2016,9 @@ static int cpuset_populate(struct cgroup
 	err = add_auto_migration_file(cont, ss, &cft_auto_migrate_interval);
 	if (err < 0)
 		return err;
+	err = add_auto_migration_file(cont, ss, &cft_migrate_max_mapcount);
+	if (err < 0)
+		return err;
 	/* memory_pressure_enabled is in root cpuset only */
 	if (!cont->parent)
 		err = cgroup_add_file(cont, ss,
@@ -2064,6 +2103,7 @@ static struct cgroup_subsys_state *cpuse
 		set_bit(CS_AUTO_MIGRATE, &cs->flags);
 	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 	cs->auto_migrate_interval = parent->auto_migrate_interval;
+	cs->migrate_max_mapcount  = parent->migrate_max_mapcount;
 	cpumask_clear(cs->cpus_allowed);
 	nodes_clear(cs->mems_allowed);
 	fmeter_init(&cs->fmeter);
Index: linux-2.6.36-mmotm-101103-1217/include/linux/sched.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/sched.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/sched.h
@@ -1464,6 +1464,7 @@ struct task_struct {
 #endif
 	unsigned long next_migrate;	/* internode migration hysteresis */
 	unsigned long auto_migrate_interval;	/* from cpuset */
+	unsigned int migrate_max_mapcount;	/* for !MPOL_MF_MOVE_ALL */
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
Index: linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/mempolicy.c
+++ linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c
@@ -1051,11 +1051,13 @@ static void migrate_page_add(struct page
 				unsigned long flags)
 {
 	/*
-	 * Avoid migrating a file backed page in a private mapping or
-	 * a page that is shared with others.
+	 * Avoid migrating a file backed page in a private mapping, or
+	 * a page that is shared with > 'migrate_max_mapcount' others
+	 * unless MPOL_MF_MOVE_ALL specified.
 	 */
 	if ((!(flags & MPOL_MF_MOVE_ANON_ONLY) || PageAnon(page)) &&
-		((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)) {
+		((flags & MPOL_MF_MOVE_ALL) ||
+			page_mapcount(page) <= migrate_max_mapcount(current))) {
 		if (!isolate_lru_page(page)) {
 			list_add_tail(&page->lru, pagelist);
 			inc_zone_page_state(page, NR_ISOLATED_ANON +
--
To unsubscribe from this list: send the line "unsubscribe linux-numa" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [Devices]

  Powered by Linux