Commit-ID: e9941dae870861da410835560b603d17c43dabee Gitweb: http://git.kernel.org/tip/e9941dae870861da410835560b603d17c43dabee Author: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> AuthorDate: Sat, 3 Mar 2012 17:06:25 +0100 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitDate: Fri, 18 May 2012 08:16:21 +0200 mm/mpol: Lazy migrate a process/vma Provide simple functions to lazy migrate a process (or part thereof). These will be used to implement memory migration for NUMA process migration. Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Cc: Suresh Siddha <suresh.b.siddha@xxxxxxxxx> Cc: Paul Turner <pjt@xxxxxxxxxx> Cc: Dan Smith <danms@xxxxxxxxxx> Cc: Bharata B Rao <bharata.rao@xxxxxxxxx> Cc: Lee Schermerhorn <Lee.Schermerhorn@xxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Cc: Rik van Riel <riel@xxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Link: http://lkml.kernel.org/n/tip-pdhg3wh71m4eu8xerantpvln@xxxxxxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> --- include/linux/mempolicy.h | 3 ++ mm/mempolicy.c | 84 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 0 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 578fcd0..af2983b 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -262,6 +262,9 @@ extern int vma_migratable(struct vm_area_struct *); extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +extern void lazy_migrate_vma(struct vm_area_struct *vma, int node); +extern void lazy_migrate_process(struct mm_struct *mm, int node); + #else struct mempolicy {}; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 57fbc4c..df29149 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1193,6 +1193,90 @@ static long do_mbind(unsigned long start, unsigned long len, return err; } +static nodemask_t mpol_node_mask(struct mempolicy *pol, int node) +{ + if (pol->mode == MPOL_PREFERRED) { + if (pol->flags & MPOL_F_LOCAL) + return nodemask_of_node(node); + + return nodemask_of_node(pol->v.preferred_node); + } + + return pol->v.nodes; +} + +void lazy_migrate_vma(struct vm_area_struct *vma, int node) +{ + struct mempolicy *pol = NULL; + struct mempol_walk_data data; + struct mm_walk walk; + LIST_HEAD(pagelist); + nodemask_t nmask; + + if (vma->vm_file) + return; + + if (!vma_migratable(vma)) + return; + + /* + * Obtain a more-or-less correct nodemask to find which pages we need + * to unmap so that MoF can put them right again. + * + * Not quite correct for INTERLEAVE, that would need us doing + * offset_il_node() from check_pte_entry(). + * + * Also not quite correct for task policies since we don't have a task, + * approximate by having @node function as local / task-home-node. + */ + + if (vma->vm_ops && vma->vm_ops->get_policy) + pol = vma->vm_ops->get_policy(vma, vma->vm_start); + else if (vma->vm_policy) + pol = vma->vm_policy; + + if (pol) { + nmask = mpol_node_mask(pol, node); + mpol_cond_put(pol); + + /* + * If there's an explicit policy that doesn't support MoF, skip + * this vma, there's nothing we can do about that. + */ + if (!(pol->flags & MPOL_F_MOF)) + return; + } else + nmask = nodemask_of_node(node); + + data = (struct mempol_walk_data){ + .nodes = &nmask, + .flags = MPOL_MF_MOVE | MPOL_MF_INVERT, /* move all pages not in set */ + .private = &pagelist, + .vma = vma, + }; + + walk = (struct mm_walk){ + .pte_entry = check_pte_entry, + .mm = vma->vm_mm, + .private = &data, + }; + + if (!walk_page_range(vma->vm_start, vma->vm_end, &walk)) + migrate_pages_unmap_only(&pagelist); + + putback_lru_pages(&pagelist); +} + +void lazy_migrate_process(struct mm_struct *mm, int node) +{ + struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + lazy_migrate_vma(vma, node); + up_read(&mm->mmap_sem); +} + /* * User space interface with variable sized bitmaps for nodelists. */ -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html