[PATCH 11/15] Mempolicy: Use MPOL_F_LOCAL to Indicate Preferred Local Policy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



PATCH 11/15 Mempolicy: use MPOL_F_LOCAL to indicate preferred local policy

Against:  2.6.25-rc8-mm1

Now that we're using "preferred local" policy for system default,
we need to make this as fast as possible.  Because of the variable
size of the mempolicy structure [based on size of nodemasks], the
preferred_node may be in a different cacheline from the mode.  This
can result in accessing an extra cacheline in the normal case of
system default policy.  Suspect this is the cause of an observed
2-3% slowdown in page fault testing relative to kernel without this
patch series.

To alleviate this, use an internal mode flag, MPOL_F_LOCAL in the
mempolicy flags member which is guaranteed [?] to be in the same
cacheline as the mode itself.

Verified that reworked mempolicy now performs slightly better on
25-rc8-mm1 for both anon and shmem segments with system default and
vma [preferred local] policy.

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@xxxxxx>

 Documentation/vm/numa_memory_policy.txt |   11 +++----
 include/linux/mempolicy.h               |    1 
 mm/mempolicy.c                          |   45 ++++++++++++++------------------
 3 files changed, 27 insertions(+), 30 deletions(-)

Index: linux-2.6.25-rc8-mm1/include/linux/mempolicy.h
===================================================================
--- linux-2.6.25-rc8-mm1.orig/include/linux/mempolicy.h	2008-04-02 17:47:26.000000000 -0400
+++ linux-2.6.25-rc8-mm1/include/linux/mempolicy.h	2008-04-02 17:48:32.000000000 -0400
@@ -50,6 +50,7 @@ enum {
  * are never OR'ed into the mode in mempolicy API arguments.
  */
 #define MPOL_F_SHARED  (1 << 0)	/* identify shared policies */
+#define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
 
 #ifdef __KERNEL__
 
Index: linux-2.6.25-rc8-mm1/mm/mempolicy.c
===================================================================
--- linux-2.6.25-rc8-mm1.orig/mm/mempolicy.c	2008-04-02 17:47:41.000000000 -0400
+++ linux-2.6.25-rc8-mm1/mm/mempolicy.c	2008-04-02 17:51:58.000000000 -0400
@@ -110,7 +110,7 @@ enum zone_type policy_zone = 0;
 struct mempolicy default_policy = {
 	.refcnt = ATOMIC_INIT(1), /* never free it */
 	.mode = MPOL_PREFERRED,
-	.v =  { .preferred_node =  -1 },
+	.flags = MPOL_F_LOCAL,
 };
 
 static const struct mempolicy_operations {
@@ -163,7 +163,7 @@ static int mpol_new_interleave(struct me
 static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
 {
 	if (!nodes)
-		pol->v.preferred_node = -1;	/* local allocation */
+		pol->flags |= MPOL_F_LOCAL;	/* local allocation */
 	else if (nodes_empty(*nodes))
 		return -EINVAL;			/*  no allowed nodes */
 	else
@@ -290,14 +290,15 @@ static void mpol_rebind_preferred(struct
 	if (pol->flags & MPOL_F_STATIC_NODES) {
 		int node = first_node(pol->w.user_nodemask);
 
-		if (node_isset(node, *nodes))
+		if (node_isset(node, *nodes)) {
 			pol->v.preferred_node = node;
-		else
-			pol->v.preferred_node = -1;
+			pol->flags &= ~MPOL_F_LOCAL;
+		} else
+			pol->flags |= MPOL_F_LOCAL;
 	} else if (pol->flags & MPOL_F_RELATIVE_NODES) {
 		mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
 		pol->v.preferred_node = first_node(tmp);
-	} else if (pol->v.preferred_node != -1) {
+	} else if (!(pol->flags & MPOL_F_LOCAL)) {
 		pol->v.preferred_node = node_remap(pol->v.preferred_node,
 						   pol->w.cpuset_mems_allowed,
 						   *nodes);
@@ -645,7 +646,7 @@ static void get_policy_nodemask(struct m
 		*nodes = p->v.nodes;
 		break;
 	case MPOL_PREFERRED:
-		if (p->v.preferred_node >= 0)
+		if (!(p->flags & MPOL_F_LOCAL))
 			node_set(p->v.preferred_node, *nodes);
 		/* else return empty node mask for local allocation */
 		break;
@@ -1324,13 +1325,12 @@ static nodemask_t *policy_nodemask(gfp_t
 /* Return a zonelist indicated by gfp for node representing a mempolicy */
 static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
 {
-	int nd;
+	int nd = numa_node_id();
 
 	switch (policy->mode) {
 	case MPOL_PREFERRED:
-		nd = policy->v.preferred_node;
-		if (nd < 0)
-			nd = numa_node_id();
+		if (!(policy->flags & MPOL_F_LOCAL))
+			nd = policy->v.preferred_node;
 		break;
 	case MPOL_BIND:
 		/*
@@ -1339,16 +1339,13 @@ static struct zonelist *policy_zonelist(
 		 * current node is part of the mask, we use the zonelist for
 		 * the first node in the mask instead.
 		 */
-		nd = numa_node_id();
 		if (unlikely(gfp & __GFP_THISNODE) &&
 				unlikely(!node_isset(nd, policy->v.nodes)))
 			nd = first_node(policy->v.nodes);
 		break;
 	case MPOL_INTERLEAVE: /* should not happen */
-		nd = numa_node_id();
 		break;
 	default:
-		nd = 0;
 		BUG();
 	}
 	return node_zonelist(nd, gfp);
@@ -1379,14 +1376,15 @@ static unsigned interleave_nodes(struct 
  */
 unsigned slab_node(struct mempolicy *policy)
 {
-	if (!policy)
+	if (!policy || policy->flags & MPOL_F_LOCAL)
 		return numa_node_id();
 
 	switch (policy->mode) {
 	case MPOL_PREFERRED:
-		if (unlikely(policy->v.preferred_node >= 0))
-			return policy->v.preferred_node;
-		return numa_node_id();
+		/*
+		 * handled MPOL_F_LOCAL above
+		 */
+		return policy->v.preferred_node;
 
 	case MPOL_INTERLEAVE:
 		return interleave_nodes(policy);
@@ -1667,7 +1665,8 @@ int __mpol_equal(struct mempolicy *a, st
 	case MPOL_INTERLEAVE:
 		return nodes_equal(a->v.nodes, b->v.nodes);
 	case MPOL_PREFERRED:
-		return a->v.preferred_node == b->v.preferred_node;
+		return a->v.preferred_node == b->v.preferred_node &&
+			a->flags == b->flags;
 	default:
 		BUG();
 		return 0;
@@ -1947,7 +1946,7 @@ void numa_default_policy(void)
 }
 
 /*
- * "local" is pseudo-policy:  MPOL_PREFERRED with preferred_node == -1
+ * "local" is pseudo-policy:  MPOL_PREFERRED with MPOL_F_LOCAL flag
  * Used only for mpol_to_str()
  */
 #define MPOL_LOCAL (MPOL_INTERLEAVE + 1)
@@ -1963,7 +1962,6 @@ static inline int mpol_to_str(char *buff
 {
 	char *p = buffer;
 	int l;
-	int nid;
 	nodemask_t nodes;
 	unsigned short mode;
 	unsigned short flags = pol ? pol->flags : 0;
@@ -1980,11 +1978,10 @@ static inline int mpol_to_str(char *buff
 
 	case MPOL_PREFERRED:
 		nodes_clear(nodes);
-		nid = pol->v.preferred_node;
-		if (nid < 0)
+		if (flags & MPOL_F_LOCAL)
 			mode = MPOL_LOCAL;	/* pseudo-policy */
 		else
-			node_set(nid, nodes);
+			node_set(pol->v.preferred_node, nodes);
 		break;
 
 	case MPOL_BIND:
Index: linux-2.6.25-rc8-mm1/Documentation/vm/numa_memory_policy.txt
===================================================================
--- linux-2.6.25-rc8-mm1.orig/Documentation/vm/numa_memory_policy.txt	2008-04-02 17:47:37.000000000 -0400
+++ linux-2.6.25-rc8-mm1/Documentation/vm/numa_memory_policy.txt	2008-04-02 17:47:48.000000000 -0400
@@ -176,12 +176,11 @@ Components of Memory Policies
 	containing the cpu where the allocation takes place.
 
 	    Internally, the Preferred policy uses a single node--the
-	    preferred_node member of struct mempolicy.  A "distinguished
-	    value of this preferred_node, currently '-1', is interpreted
-	    as "the node containing the cpu where the allocation takes
-	    place"--local allocation.  "Local" allocation policy can be
-	    viewed as a Preferred policy that starts at the node containing
-	    the cpu where the allocation takes place.
+	    preferred_node member of struct mempolicy.  When the internal
+	    mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
+	    the policy is interpreted as local allocation.  "Local" allocation
+	    policy can be viewed as a Preferred policy that starts at the node
+	    containing the cpu where the allocation takes place.
 
 	    It is possible for the user to specify that local allocation is
 	    always preferred by passing an empty nodemask with this mode.
--
To unsubscribe from this list: send the line "unsubscribe linux-numa" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [Devices]

  Powered by Linux