From: Rik van Riel <riel@xxxxxxxxxx> Excessive migration of pages can hurt the performance of workloads that span multiple NUMA nodes. However, it turns out that the p->numa_migrate_deferred knob is a really big hammer, which does reduce migration rates, but does not actually help performance. It is time to rip it out, and replace it with something smarter. Signed-off-by: Rik van Riel <riel@xxxxxxxxxx> --- include/linux/sched.h | 1 - kernel/sched/fair.c | 8 -------- kernel/sysctl.c | 7 ------- mm/mempolicy.c | 45 --------------------------------------------- 4 files changed, 61 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 42f2baf..9e4cb598 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1345,7 +1345,6 @@ struct task_struct { unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; - int numa_migrate_deferred; unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ struct callback_head numa_work; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7f9b376..410858e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -794,14 +794,6 @@ unsigned int sysctl_numa_balancing_scan_size = 256; /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */ unsigned int sysctl_numa_balancing_scan_delay = 1000; -/* - * After skipping a page migration on a shared page, skip N more numa page - * migrations unconditionally. This reduces the number of NUMA migrations - * in shared memory workloads, and has the effect of pulling tasks towards - * where their memory lives, over pulling the memory towards the task. - */ -unsigned int sysctl_numa_balancing_migrate_deferred = 16; - static unsigned int task_nr_scan_windows(struct task_struct *p) { unsigned long rss = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 14c4f51..821e3f1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -392,13 +392,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "numa_balancing_migrate_deferred", - .data = &sysctl_numa_balancing_migrate_deferred, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9a2f6dd..0522aa2 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2247,35 +2247,6 @@ static void sp_free(struct sp_node *n) kmem_cache_free(sn_cache, n); } -#ifdef CONFIG_NUMA_BALANCING -static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) -{ - /* Never defer a private fault */ - if (cpupid_match_pid(p, last_cpupid)) - return false; - - if (p->numa_migrate_deferred) { - p->numa_migrate_deferred--; - return true; - } - return false; -} - -static inline void defer_numa_migrate(struct task_struct *p) -{ - p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred; -} -#else -static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) -{ - return false; -} - -static inline void defer_numa_migrate(struct task_struct *p) -{ -} -#endif /* CONFIG_NUMA_BALANCING */ - /** * mpol_misplaced - check whether current page node is valid in policy * @@ -2378,24 +2349,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long */ last_cpupid = page_cpupid_xchg_last(page, this_cpupid); if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) { - - /* See sysctl_numa_balancing_migrate_deferred comment */ - if (!cpupid_match_pid(current, last_cpupid)) - defer_numa_migrate(current); - goto out; } - - /* - * The quadratic filter above reduces extraneous migration - * of shared pages somewhat. This code reduces it even more, - * reducing the overhead of page migrations of shared pages. - * This makes workloads with shared pages rely more on - * "move task near its memory", and less on "move memory - * towards its task", which is exactly what we want. - */ - if (numa_migrate_deferred(current, last_cpupid)) - goto out; } if (curnid != polnid) -- 1.8.3.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>