See comments in code, and previous commit messages for details of implementation and usage. Fix whitespace while here. Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Signed-off-by: Ben Widawsky <ben.widawsky@xxxxxxxxx> --- .../admin-guide/mm/numa_memory_policy.rst | 16 ++++++++++++---- include/uapi/linux/mempolicy.h | 6 +++--- mm/mempolicy.c | 14 ++++++-------- mm/page_alloc.c | 3 --- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst index 1ad020c459b8..b69963a37fc8 100644 --- a/Documentation/admin-guide/mm/numa_memory_policy.rst +++ b/Documentation/admin-guide/mm/numa_memory_policy.rst @@ -245,6 +245,14 @@ MPOL_INTERLEAVED address range or file. During system boot up, the temporary interleaved system default policy works in this mode. +MPOL_PREFERRED_MANY + This mode specifies that the allocation should be attempted from the + nodemask specified in the policy. If that allocation fails, the kernel + will search other nodes, in order of increasing distance from the first + set bit in the nodemask based on information provided by the platform + firmware. It is similar to MPOL_PREFERRED with the main exception that + is is an error to have an empty nodemask. + NUMA memory policy supports the following optional mode flags: MPOL_F_STATIC_NODES @@ -253,10 +261,10 @@ MPOL_F_STATIC_NODES nodes changes after the memory policy has been defined. Without this flag, any time a mempolicy is rebound because of a - change in the set of allowed nodes, the node (Preferred) or - nodemask (Bind, Interleave) is remapped to the new set of - allowed nodes. This may result in nodes being used that were - previously undesired. + change in the set of allowed nodes, the preferred nodemask (Preferred + Many), preferred node (Preferred) or nodemask (Bind, Interleave) is + remapped to the new set of allowed nodes. This may result in nodes + being used that were previously undesired. With this flag, if the user-specified nodes overlap with the nodes allowed by the task's cpuset, then the memory policy is diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 3354774af61e..ad3eee651d4e 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -16,13 +16,13 @@ */ /* Policies */ -enum { - MPOL_DEFAULT, +enum { MPOL_DEFAULT, MPOL_PREFERRED, MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, - MPOL_MAX, /* always last member of enum */ + MPOL_PREFERRED_MANY, + MPOL_MAX, /* always last member of enum */ }; /* Flags for set_mempolicy */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index fb49bea41ab8..07e916f8f6b7 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -108,8 +108,6 @@ #include "internal.h" -#define MPOL_PREFERRED_MANY MPOL_MAX - /* Internal flags */ #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ @@ -180,7 +178,7 @@ struct mempolicy *get_task_policy(struct task_struct *p) static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes); -} mpol_ops[MPOL_MAX + 1]; +} mpol_ops[MPOL_MAX]; static inline int mpol_store_user_nodemask(const struct mempolicy *pol) { @@ -385,8 +383,8 @@ static void mpol_rebind_preferred_common(struct mempolicy *pol, } /* MPOL_PREFERRED_MANY allows multiple nodes to be set in 'nodes' */ -static void __maybe_unused mpol_rebind_preferred_many(struct mempolicy *pol, - const nodemask_t *nodes) +static void mpol_rebind_preferred_many(struct mempolicy *pol, + const nodemask_t *nodes) { mpol_rebind_preferred_common(pol, nodes, nodes); } @@ -448,7 +446,7 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) mmap_write_unlock(mm); } -static const struct mempolicy_operations mpol_ops[MPOL_MAX + 1] = { +static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { [MPOL_DEFAULT] = { .rebind = mpol_rebind_default, }, @@ -466,8 +464,8 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX + 1] = { }, /* MPOL_LOCAL is converted to MPOL_PREFERRED on policy creation */ [MPOL_PREFERRED_MANY] = { - .create = NULL, - .rebind = NULL, + .create = mpol_new_preferred_many, + .rebind = mpol_rebind_preferred_many, }, }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0f90419fe0d8..b89c9c2637bf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4867,9 +4867,6 @@ struct zonelist *preferred_zonelist(gfp_t gfp_mask, const nodemask_t *prefmask, nodemask_t pref; int nid, local_node = numa_mem_id(); - /* Multi nodes not supported yet */ - VM_BUG_ON(prefmask && nodes_weight(*prefmask) != 1); - #define _isset(mask, node) \ (!(mask) || nodes_empty(*(mask)) ? 1 : node_isset(node, *(mask))) /* -- 2.27.0