On Thu, Jul 25, 2013 at 12:46:33PM +0200, Peter Zijlstra wrote: > @@ -2234,12 +2236,13 @@ static void sp_free(struct sp_node *n) > * Policy determination "mimics" alloc_page_vma(). > * Called from fault path where we know the vma and faulting address. > */ > -int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr) > +int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr, int *account_node) > { > struct mempolicy *pol; > struct zone *zone; > int curnid = page_to_nid(page); > unsigned long pgoff; > + int thisnid = numa_node_id(); > int polnid = -1; > int ret = -1; > > @@ -2261,7 +2264,7 @@ int mpol_misplaced(struct page *page, st > > case MPOL_PREFERRED: > if (pol->flags & MPOL_F_LOCAL) > - polnid = numa_node_id(); > + polnid = thisnid; > else > polnid = pol->v.preferred_node; > break; > @@ -2276,7 +2279,7 @@ int mpol_misplaced(struct page *page, st > if (node_isset(curnid, pol->v.nodes)) > goto out; > (void)first_zones_zonelist( > - node_zonelist(numa_node_id(), GFP_HIGHUSER), > + node_zonelist(thisnid, GFP_HIGHUSER), > gfp_zone(GFP_HIGHUSER), > &pol->v.nodes, &zone); > polnid = zone->node; > @@ -2291,8 +2294,7 @@ int mpol_misplaced(struct page *page, st > int last_nidpid; > int this_nidpid; > > - polnid = numa_node_id(); > - this_nidpid = nid_pid_to_nidpid(polnid, current->pid);; > + this_nidpid = nid_pid_to_nidpid(thisnid, current->pid);; > > /* > * Multi-stage node selection is used in conjunction > @@ -2318,6 +2320,39 @@ int mpol_misplaced(struct page *page, st > last_nidpid = page_nidpid_xchg_last(page, this_nidpid); > if (!nidpid_pid_unset(last_nidpid) && nidpid_to_nid(last_nidpid) != polnid) That should've become: if (!nidpid_pid_unset(last_nidpid) && nidpid_to_nid(last_nidpid) != thisnid) > goto out; > + > + /* > + * Preserve interleave pages while allowing useful > + * ->numa_faults[] statistics. > + * > + * When migrating into an interleave set, migrate to > + * the correct interleaved node but account against the > + * current node (where the task is running). > + * > + * Not doing this would result in ->numa_faults[] being > + * flat across the interleaved nodes, making it > + * impossible to shrink the node list even when all > + * tasks are running on a single node. > + * > + * src dst migrate account > + * 0 0 -- this_node $page_node > + * 0 1 -- policy_node this_node > + * 1 0 -- this_node $page_node > + * 1 1 -- policy_node this_node > + * > + */ > + switch (pol->mode) { > + case MPOL_INTERLEAVE: > + if (node_isset(thisnid, pol->v.nodes)) { > + if (account_node) > + *account_node = thisnid; > + } > + break; > + > + default: > + polnid = thisnid; > + break; > + } > } > > if (curnid != polnid) -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>