+ numa-mempolicy-allow-tunable-policy-for-system-init.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     numa: mempolicy: Allow tunable policy for system init
has been added to the -mm tree.  Its filename is
     numa-mempolicy-allow-tunable-policy-for-system-init.patch

*** Remember to use Documentation/SubmitChecklist when testing your code ***

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: numa: mempolicy: Allow tunable policy for system init
From: Paul Mundt <lethal@xxxxxxxxxxxx>

The current default behaviour for system init (via numa_policy_init()) is
to use MPOL_INTERLEAVE across the online nodes in order to avoid a
preference for node 0.  This tends to be undesirable for small nodes that
really would rather prefer to keep as many allocations on node 0 as
possible.

As tmpfs already provides a parser for the policy and nodelist --
shmem_parse_mpol(), we generalize this and wrap in to it via an mpolinit=
(for lack of a better name) setup param.  Other code that wishes to do
mempolicy parsing for itself can use the new mpol_parse_options().

As an example, for small nodes, one might prefer to boot with
'mpolinit=prefer:0'.  numa_default_policy() will still overload this with
MPOL_DEFAULT later on anyways, so this is only useful for system init.

Signed-off-by: Paul Mundt <lethal@xxxxxxxxxxxx>
Cc: Hugh Dickins <hugh@xxxxxxxxxxx>
Cc: Christoph Lameter <clameter@xxxxxxx>
Cc: Andi Kleen <ak@xxxxxxx>
Cc: Lee Schermerhorn <lee.schermerhorn@xxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 Documentation/kernel-parameters.txt |    6 +
 include/linux/mempolicy.h           |    8 ++
 mm/mempolicy.c                      |   81 ++++++++++++++++++++++++--
 mm/shmem.c                          |   54 -----------------
 4 files changed, 91 insertions(+), 58 deletions(-)

diff -puN Documentation/kernel-parameters.txt~numa-mempolicy-allow-tunable-policy-for-system-init Documentation/kernel-parameters.txt
--- a/Documentation/kernel-parameters.txt~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/Documentation/kernel-parameters.txt
@@ -1080,6 +1080,12 @@ and is between 256 and 4096 characters. 
 	mousedev.yres=	[MOUSE] Vertical screen resolution, used for devices
 			reporting absolute coordinates, such as tablets
 
+	mpolinit=	[KNL,NUMA]
+			Format: <policy>,[:<nodelist>]
+			Sets the default memory policy to be used at system
+			init time. Defaults to MPOL_INTERLEAVE between online
+			nodes.
+
 	mpu401=		[HW,OSS]
 			Format: <io>,<irq>
 
diff -puN include/linux/mempolicy.h~numa-mempolicy-allow-tunable-policy-for-system-init include/linux/mempolicy.h
--- a/include/linux/mempolicy.h~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/include/linux/mempolicy.h
@@ -148,6 +148,8 @@ extern void mpol_rebind_task(struct task
 					const nodemask_t *new);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern void mpol_fix_fork_child_flag(struct task_struct *p);
+extern int mpol_parse_options(char *value, int *policy,
+			      nodemask_t *policy_nodes);
 #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
 
 #ifdef CONFIG_CPUSETS
@@ -253,6 +255,12 @@ static inline void mpol_fix_fork_child_f
 {
 }
 
+static inline int mpol_parse_options(char *value, int *policy,
+				     nodemask_t *policy_nodes)
+{
+	return 1;
+}
+
 #define set_cpuset_being_rebound(x) do {} while (0)
 
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
diff -puN mm/mempolicy.c~numa-mempolicy-allow-tunable-policy-for-system-init mm/mempolicy.c
--- a/mm/mempolicy.c~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/mm/mempolicy.c
@@ -89,7 +89,7 @@
 #include <linux/migrate.h>
 #include <linux/rmap.h>
 #include <linux/security.h>
-
+#include <linux/ctype.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
 
@@ -1594,9 +1594,72 @@ void mpol_free_shared_policy(struct shar
 	spin_unlock(&p->lock);
 }
 
+int mpol_parse_options(char *value, int *policy, nodemask_t *policy_nodes)
+{
+	char *nodelist = strchr(value, ':');
+	int err = 1;
+
+	if (nodelist) {
+		/* NUL-terminate policy string */
+		*nodelist++ = '\0';
+		if (nodelist_parse(nodelist, *policy_nodes))
+			goto out;
+	}
+	if (!strcmp(value, "default")) {
+		*policy = MPOL_DEFAULT;
+		/* Don't allow a nodelist */
+		if (!nodelist)
+			err = 0;
+	} else if (!strcmp(value, "prefer")) {
+		*policy = MPOL_PREFERRED;
+		/* Insist on a nodelist of one node only */
+		if (nodelist) {
+			char *rest = nodelist;
+			while (isdigit(*rest))
+				rest++;
+			if (!*rest)
+				err = 0;
+		}
+	} else if (!strcmp(value, "bind")) {
+		*policy = MPOL_BIND;
+		/* Insist on a nodelist */
+		if (nodelist)
+			err = 0;
+	} else if (!strcmp(value, "interleave")) {
+		*policy = MPOL_INTERLEAVE;
+		/* Default to nodes online if no nodelist */
+		if (!nodelist)
+			*policy_nodes = node_online_map;
+		err = 0;
+	}
+out:
+	/* Restore string for error message */
+	if (nodelist)
+		*--nodelist = ':';
+	return err;
+}
+
+/* Set interleaving policy for system init. This way not all
+   the data structures allocated at system boot end up in node zero. */
+static nodemask_t nmask_sysinit __initdata;
+static int policy_sysinit __initdata = MPOL_INTERLEAVE;
+
+static int __init setup_mpol_sysinit(char *str)
+{
+	if (mpol_parse_options(str, &policy_sysinit, &nmask_sysinit)) {
+		printk("mpolinit failed, falling back on interleave\n");
+		return 0;
+	}
+
+	return 1;
+}
+__setup("mpolinit=", setup_mpol_sysinit);
+
 /* assumes fs == KERNEL_DS */
 void __init numa_policy_init(void)
 {
+	nodemask_t *nmask;
+
 	policy_cache = kmem_cache_create("numa_policy",
 					 sizeof(struct mempolicy),
 					 0, SLAB_PANIC, NULL, NULL);
@@ -1605,11 +1668,19 @@ void __init numa_policy_init(void)
 				     sizeof(struct sp_node),
 				     0, SLAB_PANIC, NULL, NULL);
 
-	/* Set interleaving policy for system init. This way not all
-	   the data structures allocated at system boot end up in node zero. */
+	/*
+	 * Use the specified nodemask for init, or fall back to
+	 * node_online_map.
+	 */
+	if (policy_sysinit == MPOL_DEFAULT)
+		nmask = NULL;
+	else if (!nodes_empty(nmask_sysinit))
+		nmask = &nmask_sysinit;
+	else
+		nmask = &node_online_map;
 
-	if (do_set_mempolicy(MPOL_INTERLEAVE, &node_online_map))
-		printk("numa_policy_init: interleaving failed\n");
+	if (do_set_mempolicy(policy_sysinit, nmask))
+		printk("numa_policy_init: setting init policy failed\n");
 }
 
 /* Reset policy of current process to default */
diff -puN mm/shmem.c~numa-mempolicy-allow-tunable-policy-for-system-init mm/shmem.c
--- a/mm/shmem.c~numa-mempolicy-allow-tunable-policy-for-system-init
+++ a/mm/shmem.c
@@ -958,53 +958,6 @@ redirty:
 }
 
 #ifdef CONFIG_NUMA
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
-{
-	char *nodelist = strchr(value, ':');
-	int err = 1;
-
-	if (nodelist) {
-		/* NUL-terminate policy string */
-		*nodelist++ = '\0';
-		if (nodelist_parse(nodelist, *policy_nodes))
-			goto out;
-		if (!nodes_subset(*policy_nodes, node_online_map))
-			goto out;
-	}
-	if (!strcmp(value, "default")) {
-		*policy = MPOL_DEFAULT;
-		/* Don't allow a nodelist */
-		if (!nodelist)
-			err = 0;
-	} else if (!strcmp(value, "prefer")) {
-		*policy = MPOL_PREFERRED;
-		/* Insist on a nodelist of one node only */
-		if (nodelist) {
-			char *rest = nodelist;
-			while (isdigit(*rest))
-				rest++;
-			if (!*rest)
-				err = 0;
-		}
-	} else if (!strcmp(value, "bind")) {
-		*policy = MPOL_BIND;
-		/* Insist on a nodelist */
-		if (nodelist)
-			err = 0;
-	} else if (!strcmp(value, "interleave")) {
-		*policy = MPOL_INTERLEAVE;
-		/* Default to nodes online if no nodelist */
-		if (!nodelist)
-			*policy_nodes = node_online_map;
-		err = 0;
-	}
-out:
-	/* Restore string for error message */
-	if (nodelist)
-		*--nodelist = ':';
-	return err;
-}
-
 static struct page *shmem_swapin_async(struct shared_policy *p,
 				       swp_entry_t entry, unsigned long idx)
 {
@@ -1057,11 +1010,6 @@ shmem_alloc_page(gfp_t gfp, struct shmem
 	return page;
 }
 #else
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
-{
-	return 1;
-}
-
 static inline struct page *
 shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
 {
@@ -2151,7 +2099,7 @@ static int shmem_parse_options(char *opt
 			if (*rest)
 				goto bad_val;
 		} else if (!strcmp(this_char,"mpol")) {
-			if (shmem_parse_mpol(value,policy,policy_nodes))
+			if (mpol_parse_options(value,policy,policy_nodes))
 				goto bad_val;
 		} else {
 			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
_

Patches currently in -mm which might be from lethal@xxxxxxxxxxxx are

slab-fix-alien-cache-handling.patch
potential-parse-error-in-ifdef-part-3.patch
lots-of-architectures-enable-arbitary-speed-tty-support.patch
git-sh.patch
numa-mempolicy-allow-tunable-policy-for-system-init.patch
numa-mempolicy-allow-tunable-policy-for-system-init-fix.patch
pvr2fb-fix-pseudo_palette-array-overrun-and-typecast.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux