Gregory Price <gourry.memverge@xxxxxxxxx> writes: > In the event of rebind, pol->nodemask can change at the same time as an > allocation occurs. We can detect this with tsk->mems_allowed_seq and > prevent a miscount or an allocation failure from occurring. > > The same thing happens in the allocators to detect failure, but this > can prevent spurious failures in a much smaller critical section. > > Suggested-by: "Huang, Ying" <ying.huang@xxxxxxxxx> > Signed-off-by: Gregory Price <gregory.price@xxxxxxxxxxxx> > --- > mm/mempolicy.c | 31 +++++++++++++++++++++++++------ > 1 file changed, 25 insertions(+), 6 deletions(-) > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index d8cc3a577986..ed0d5d2d456a 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -1878,11 +1878,17 @@ bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone) > > static unsigned int weighted_interleave_nodes(struct mempolicy *policy) > { > - unsigned int node = current->il_prev; > - > - if (!current->il_weight || !node_isset(node, policy->nodes)) { > + unsigned int node; > + unsigned int cpuset_mems_cookie; > + > +retry: > + /* to prevent miscount use tsk->mems_allowed_seq to detect rebind */ > + cpuset_mems_cookie = read_mems_allowed_begin(); > + node = current->il_prev; > + if (!node || !node_isset(node, policy->nodes)) { ~~~~~ !current->il_weight ? -- Best Regards, Huang, Ying > node = next_node_in(node, policy->nodes); > - /* can only happen if nodemask is being rebound */ > + if (read_mems_allowed_retry(cpuset_mems_cookie)) > + goto retry; > if (node == MAX_NUMNODES) > return node; > current->il_prev = node; > @@ -1896,8 +1902,14 @@ static unsigned int weighted_interleave_nodes(struct mempolicy *policy) > static unsigned int interleave_nodes(struct mempolicy *policy) > { > unsigned int nid; > + unsigned int cpuset_mems_cookie; > + > + /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */ > + do { > + cpuset_mems_cookie = read_mems_allowed_begin(); > + nid = next_node_in(current->il_prev, policy->nodes); > + } while (read_mems_allowed_retry(cpuset_mems_cookie)); > > - nid = next_node_in(current->il_prev, policy->nodes); > if (nid < MAX_NUMNODES) > current->il_prev = nid; > return nid; > @@ -2374,6 +2386,7 @@ static unsigned long alloc_pages_bulk_array_weighted_interleave(gfp_t gfp, > struct page **page_array) > { > struct task_struct *me = current; > + unsigned int cpuset_mems_cookie; > unsigned long total_allocated = 0; > unsigned long nr_allocated = 0; > unsigned long rounds; > @@ -2391,7 +2404,13 @@ static unsigned long alloc_pages_bulk_array_weighted_interleave(gfp_t gfp, > if (!nr_pages) > return 0; > > - nnodes = read_once_policy_nodemask(pol, &nodes); > + /* read the nodes onto the stack, retry if done during rebind */ > + do { > + cpuset_mems_cookie = read_mems_allowed_begin(); > + nnodes = read_once_policy_nodemask(pol, &nodes); > + } while (read_mems_allowed_retry(cpuset_mems_cookie)); > + > + /* if the nodemask has become invalid, we cannot do anything */ > if (!nnodes) > return 0;