AutoPage Migration - hook automigration to migrate-on-fault Add a per cpuset control--auto_migrate_lazy--to use migrate-on-fault for auto-migration, if configured. Modify migrate_to_node() to just unmap the eligible pages via migrate_pages_unmap_only() when MPOL_MF_LAZY flag is set. Set auto_migrate_lazy by default in the top cpuset. Lazy automigration is preferred. Why? Think of the effect of direct, auto-migration on a multithreaded process. [Perhaps I should change this flag to 'auto_migrate_direct' and default that to disabled?] This patch depends on the "migrate-on-fault" patch series that defines the MPOL_MF_LAZY flag and the migrate_pages_unmap_only() function. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx> include/linux/auto-migrate.h | 19 ++++++++++++++++++ kernel/cpuset.c | 44 ++++++++++++++++++++++++++++++++++++++++++- mm/mempolicy.c | 8 ++++++- 3 files changed, 69 insertions(+), 2 deletions(-) Index: linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/mempolicy.c +++ linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c @@ -1092,7 +1092,10 @@ static int migrate_to_node(struct mm_str return PTR_ERR(vma); if (!list_empty(&pagelist)) { - err = migrate_pages(&pagelist, new_node_page, dest, 0); + if (is_lazy_migration(flags)) + err = migrate_pages_unmap_only(&pagelist); + else + err = migrate_pages(&pagelist, new_node_page, dest, 0); if (err) putback_lru_pages(&pagelist); } @@ -1260,6 +1263,9 @@ void auto_migrate_task_memory(void) */ BUG_ON(!mm); + if (auto_migrate_lazy(current)) + set_lazy_migration(flags); + /* * Pass destination node as source node plus 'INVERT flag: * Migrate all pages NOT on destination node. Index: linux-2.6.36-mmotm-101103-1217/include/linux/auto-migrate.h =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/include/linux/auto-migrate.h +++ linux-2.6.36-mmotm-101103-1217/include/linux/auto-migrate.h @@ -81,6 +81,17 @@ static inline unsigned int migrate_max_m { return task->migrate_max_mapcount; } + +extern unsigned int auto_migrate_lazy(struct task_struct *); + +#ifdef MPOL_MF_LAZY +#define is_lazy_migration(F) ((F) & MPOL_MF_LAZY) +#define set_lazy_migration(F) (F) |= MPOL_MF_LAZY +#else +#define is_lazy_migration(F) (0) +#define set_lazy_migration(F) +#endif + #else /* !CONFIG_AUTO_MIGRATION */ static inline int is_auto_migration(int flags) @@ -105,6 +116,14 @@ static inline int too_soon_for_internode return 0; } +static inline unsigned int auto_migrate_lazy(struct task_struct *) +{ + return 0; +} + +#define is_lazy_migration(F) (0) +#define set_lazy_migration(F) + #endif /* CONFIG_AUTO_MIGRATION */ #endif Index: linux-2.6.36-mmotm-101103-1217/kernel/cpuset.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/kernel/cpuset.c +++ linux-2.6.36-mmotm-101103-1217/kernel/cpuset.c @@ -140,6 +140,7 @@ typedef enum { CS_SHARED_FILE_POLICY, CS_MIGRATE_ON_FAULT, CS_AUTO_MIGRATE, + CS_LAZY_MIGRATE, } cpuset_flagbits_t; /* convenient tests for these bits */ @@ -198,8 +199,14 @@ static inline int is_auto_migrate(const return test_bit(CS_AUTO_MIGRATE, &cs->flags); } +static inline int is_auto_migrate_lazy(const struct cpuset *cs) +{ + return test_bit(CS_LAZY_MIGRATE, &cs->flags); +} + static struct cpuset top_cpuset = { - .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), + .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE) | + (1 << CS_LAZY_MIGRATE)), .auto_migrate_interval = AUTO_MIGRATE_INTERVAL_DFLT, .migrate_max_mapcount = 1, }; @@ -1596,6 +1603,7 @@ typedef enum { FILE_AUTO_MIGRATE, FILE_AUTO_MIGRATE_INTERVAL, FILE_MIGRATE_MAX_MAPCOUNT, + FILE_AUTO_MIGRATE_LAZY, } cpuset_filetype_t; static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) @@ -1653,6 +1661,9 @@ static int cpuset_write_u64(struct cgrou case FILE_MIGRATE_MAX_MAPCOUNT: retval = update_migrate_max_mapcount(cs, val); break; + case FILE_AUTO_MIGRATE_LAZY: + retval = update_flag(CS_LAZY_MIGRATE, cs, val); + break; default: retval = -EINVAL; break; @@ -1831,6 +1842,8 @@ static u64 cpuset_read_u64(struct cgroup return is_migrate_on_fault(cs); case FILE_AUTO_MIGRATE: return is_auto_migrate(cs); + case FILE_AUTO_MIGRATE_LAZY: + return is_auto_migrate_lazy(cs); default: BUG(); } @@ -1990,6 +2003,13 @@ static struct cftype cft_migrate_max_map .private = FILE_MIGRATE_MAX_MAPCOUNT, }; +static struct cftype cft_auto_migrate_lazy = { + .name = "auto_migrate_lazy", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_AUTO_MIGRATE_LAZY, +}; + static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) { int err; @@ -2019,6 +2039,9 @@ static int cpuset_populate(struct cgroup err = add_auto_migration_file(cont, ss, &cft_migrate_max_mapcount); if (err < 0) return err; + err = add_auto_migration_file(cont, ss, &cft_auto_migrate_lazy); + if (err < 0) + return err; /* memory_pressure_enabled is in root cpuset only */ if (!cont->parent) err = cgroup_add_file(cont, ss, @@ -2101,6 +2124,8 @@ static struct cgroup_subsys_state *cpuse set_bit(CS_MIGRATE_ON_FAULT, &cs->flags); if (is_auto_migrate(parent)) set_bit(CS_AUTO_MIGRATE, &cs->flags); + if (is_auto_migrate_lazy(parent)) + set_bit(CS_LAZY_MIGRATE, &cs->flags); set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cs->auto_migrate_interval = parent->auto_migrate_interval; cs->migrate_max_mapcount = parent->migrate_max_mapcount; @@ -2874,3 +2899,20 @@ void cpuset_task_status_allowed(struct s seq_nodemask_list(m, &task->mems_allowed); seq_printf(m, "\n"); } + +#ifdef CONFIG_AUTO_MIGRATION +unsigned int auto_migrate_lazy(struct task_struct *task) +{ + unsigned int lazy; + + if (task_cs(current) == &top_cpuset) { + /* Don't need rcu for top_cpuset. It's never freed. */ + lazy = is_auto_migrate_lazy(&top_cpuset); + } else { + rcu_read_lock(); + lazy = is_auto_migrate_lazy(task_cs(current)); + rcu_read_unlock(); + } + return lazy; +} +#endif -- To unsubscribe from this list: send the line "unsubscribe linux-numa" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html