Need find the cloest non-DRAM node to demote DRAM pages. Add "skip_ram_node" parameter to find_next_best_node() to skip DRAM node on demand. Signed-off-by: Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx> --- mm/internal.h | 11 +++++++++++ mm/page_alloc.c | 15 +++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 9eeaf2b..46ad0d8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -292,6 +292,17 @@ static inline bool is_data_mapping(vm_flags_t flags) return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; } +#ifdef CONFIG_NUMA +extern int find_next_best_node(int node, nodemask_t *used_node_mask, + bool skip_ram_node); +#else +static inline int find_next_best_node(int node, nodemask_t *used_node_mask, + bool skip_ram_node) +{ + return 0; +} +#endif + /* mm/util.c */ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 68ad8c6..07d767b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5375,6 +5375,7 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write, * find_next_best_node - find the next node that should appear in a given node's fallback list * @node: node whose fallback list we're appending * @used_node_mask: nodemask_t of already used nodes + * @skip_ram_node: find next best non-DRAM node * * We use a number of factors to determine which is the next node that should * appear on a given node's fallback list. The node should not have appeared @@ -5386,7 +5387,8 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write, * * Return: node id of the found node or %NUMA_NO_NODE if no node is found. */ -static int find_next_best_node(int node, nodemask_t *used_node_mask) +int find_next_best_node(int node, nodemask_t *used_node_mask, + bool skip_ram_node) { int n, val; int min_val = INT_MAX; @@ -5394,13 +5396,19 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) const struct cpumask *tmp = cpumask_of_node(0); /* Use the local node if we haven't already */ - if (!node_isset(node, *used_node_mask)) { + if (!node_isset(node, *used_node_mask) && + !skip_ram_node) { node_set(node, *used_node_mask); return node; } for_each_node_state(n, N_MEMORY) { + /* Find next best non-DRAM node */ + if (skip_ram_node && + (node_isset(n, def_alloc_nodemask))) + continue; + /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) continue; @@ -5432,7 +5440,6 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) return best_node; } - /* * Build zonelists ordered by node and zones within node. * This results in maximum locality--normal zone overflows into local @@ -5494,7 +5501,7 @@ static void build_zonelists(pg_data_t *pgdat) nodes_clear(used_mask); memset(node_order, 0, sizeof(node_order)); - while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { + while ((node = find_next_best_node(local_node, &used_mask, false)) >= 0) { /* * We don't want to pressure a particular node. * So adding penalty to the first node in same -- 1.8.3.1