AutoPage Migration - Preparatory patch Added AUTO_MIGRATION Kconfig option that depends on MIGRATION. Conditionally compiled auto-migration features now controlled by this option. Define mempolicy.c internal flag for auto-migration. This flag will select auto-migration specific behavior in the existing page migration functions. Test this flag via helper function is_auto_migration(). Can't be static inline in header because flag is private to mempolicy.c. Add auto_migrate_task_memory() to mempolicy.c. This function sets up to call migrate_to_node() with internal flags for auto-migration. Modify vma_migratable() to skip VMAs that don't have local policy when auto-migrating. vma_migratable() now called from check_range() in mempolicy.c and do_move_pages() in migrate.c. Subsequent patches will arrange for auto_migrate_task_memory() to be called when a task returns to user space after the scheduler migrates it to a cpu on a node different from the node where it last executed. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx> include/linux/auto-migrate.h | 25 +++++++++++++++ include/linux/mempolicy.h | 13 +++++++ mm/Kconfig | 7 ++++ mm/mempolicy.c | 71 ++++++++++++++++++++++++++++++++++++++----- mm/migrate.c | 3 + 5 files changed, 110 insertions(+), 9 deletions(-) Index: linux-2.6.36-mmotm-101103-1217/mm/Kconfig =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/Kconfig +++ linux-2.6.36-mmotm-101103-1217/mm/Kconfig @@ -211,6 +211,13 @@ config MIGRATE_ON_FAULT page is not currently mapped by any tasks. This allows a task to pull unmapped pages closer to itself when enabled for that task. +config AUTO_MIGRATION + bool "Auto-migrate task memory" + depends on MIGRATION + help + Allows tasks' private memory to follow that task itself across + inter-node migrations. + config PHYS_ADDR_T_64BIT def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT Index: linux-2.6.36-mmotm-101103-1217/include/linux/auto-migrate.h =================================================================== --- /dev/null +++ linux-2.6.36-mmotm-101103-1217/include/linux/auto-migrate.h @@ -0,0 +1,25 @@ +#ifndef _LINUX_AUTO_MIGRATE_H +#define _LINUX_AUTO_MIGRATE_H + +/* + * minimal memory migration definitions need by scheduler, + * sysctl, ..., so that they don't need to drag in the entire + * migrate.h and all that it depends on. + */ + +#ifdef CONFIG_AUTO_MIGRATION + +extern int is_auto_migration(int flags); + +extern void auto_migrate_task_memory(void); + +#else /* !CONFIG_AUTO_MIGRATION */ + +static inline int is_auto_migration(int flags) +{ + return 0; +} + +#endif /* CONFIG_AUTO_MIGRATION */ + +#endif Index: linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/mempolicy.c +++ linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c @@ -91,6 +91,7 @@ #include <linux/syscalls.h> #include <linux/ctype.h> #include <linux/mm_inline.h> +#include <linux/auto-migrate.h> #include <asm/tlbflush.h> #include <asm/uaccess.h> @@ -98,10 +99,16 @@ #include "internal.h" /* Internal flags */ -#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ -#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ -#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ -#define MPOL_MF_MOVE_ANON_ONLY (MPOL_MF_INTERNAL << 3) +#define MPOL_MF_DISCONTIG_OK \ + (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ +#define MPOL_MF_INVERT \ + (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ +#define MPOL_MF_STATS \ + (MPOL_MF_INTERNAL << 2) /* Gather statistics */ +#define MPOL_MF_MOVE_ANON_ONLY \ + (MPOL_MF_INTERNAL << 3) /* migrate private, anon pages only */ +#define MPOL_MF_AUTOMIGRATE \ + (MPOL_MF_INTERNAL << 4) /* auto-migrating task memory */ static struct kmem_cache *policy_cache; static struct kmem_cache *sp_cache; @@ -467,8 +474,10 @@ static void migrate_page_add(struct page /* * Check whether a vma is migratable */ -int vma_migratable(struct vm_area_struct *vma) +int vma_migratable(struct vm_area_struct *vma, int flags) { + int ret = 1; + if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) return 0; /* @@ -480,7 +489,20 @@ int vma_migratable(struct vm_area_struct gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) < policy_zone) return 0; - return 1; + + /* + * Auto-migration: only consider vmas with local allocation policy + * NOTE: we only query the start address of the vma. For shared + * segments with multiple policy ranges, this might lie, but we'll + * live with that. + */ + if (is_auto_migration(flags)) { + struct mempolicy *pol = + get_vma_policy(current, vma, vma->vm_start); + ret = is_local_allocation(pol); + mpol_cond_put(pol); + } + return ret; } /* Scan through pages checking if pages follow certain conditions. */ @@ -627,7 +649,7 @@ check_range(struct mm_struct *mm, unsign end = (end + HPAGE_MASK) & HPAGE_MASK; } else if (((flags & MPOL_MF_STRICT) || ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && - vma_migratable(vma)))) { + vma_migratable(vma, flags)))) { unsigned long endvma = vma->vm_end; unsigned long anononly = 0; @@ -1190,6 +1212,41 @@ static struct page *new_vma_page(struct */ return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); } + +#ifdef CONFIG_AUTO_MIGRATION + +int is_auto_migration(int flags) +{ + return !!(flags & MPOL_MF_AUTOMIGRATE); +} + +/** + * auto_migrate_task_memory() + * + * Called just before returning to user state when a task has been + * migrated to a new node by the schedule and sched_migrate_memory + * is enabled. + */ +void auto_migrate_task_memory(void) +{ + struct mm_struct *mm = current->mm; + int dest = cpu_to_node(task_cpu(current)); + int flags = MPOL_MF_MOVE | MPOL_MF_INVERT | MPOL_MF_AUTOMIGRATE; + + /* + * we're returning to user space, so mm must be non-NULL + */ + BUG_ON(!mm); + + /* + * Pass destination node as source node plus 'INVERT flag: + * Migrate all pages NOT on destination node. + * 'AUTOMIGRATE flag selects only VMAs with default policy + */ + migrate_to_node(mm, dest, dest, flags); +} +#endif /* _AUTO_MIGRATION */ + #else static void migrate_page_add(struct page *page, struct list_head *pagelist, Index: linux-2.6.36-mmotm-101103-1217/mm/migrate.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/migrate.c +++ linux-2.6.36-mmotm-101103-1217/mm/migrate.c @@ -35,6 +35,7 @@ #include <linux/hugetlb.h> #include <linux/gfp.h> #include <linux/vmstat.h> +#include <linux/auto-migrate.h> #include "internal.h" @@ -1156,7 +1157,7 @@ static int do_move_page_to_node_array(st err = -EFAULT; vma = find_vma(mm, pp->addr); - if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma)) + if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma, 0)) goto set_status; page = follow_page(vma, pp->addr, FOLL_GET); Index: linux-2.6.36-mmotm-101103-1217/include/linux/mempolicy.h =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/include/linux/mempolicy.h +++ linux-2.6.36-mmotm-101103-1217/include/linux/mempolicy.h @@ -234,7 +234,7 @@ extern int mpol_to_str(char *buffer, int int no_context); #endif -extern int vma_migratable(struct vm_area_struct *); +extern int vma_migratable(struct vm_area_struct *, int); struct seq_file; extern int show_numa_map(struct seq_file *, void *); @@ -249,6 +249,14 @@ extern struct mpol_range *get_numa_subma extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long, int *); +/* + * Does the argument mempolicy specify local allocation? + */ +static inline int is_local_allocation(struct mempolicy *mpol) +{ + return mpol->flags & MPOL_F_LOCAL; +} + #endif /* CONFIG_MIGRATE_ON_FAULT */ #else @@ -368,6 +376,9 @@ static inline int mpol_to_str(char *buff } #endif +static inline int vma_migratable(struct vm_area_struct *vma, int flags) + { return 0 }; + #endif /* CONFIG_NUMA */ #endif /* __KERNEL__ */ -- To unsubscribe from this list: send the line "unsubscribe linux-numa" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html