The scheduler already favours moving tasks towards its preferred node but does nothing special if the destination node is anything else. This patch favours moving tasks towards a destination node if more NUMA hinting faults were recorded on it. Similarly if migrating to a destination node would degrade locality based on NUMA hinting faults then it will be resisted. Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Signed-off-by: Mel Gorman <mgorman@xxxxxxx> --- kernel/sched/fair.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c303ba6..1a4af96 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4069,24 +4069,65 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) } #ifdef CONFIG_NUMA_BALANCING -/* Returns true if the destination node has incurred more faults */ -static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) + +static bool migrate_locality_prepare(struct task_struct *p, struct lb_env *env, + int *src_nid, int *dst_nid, + unsigned long *src_faults, unsigned long *dst_faults) { - int src_nid, dst_nid; + int priv; if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) return false; - src_nid = cpu_to_node(env->src_cpu); - dst_nid = cpu_to_node(env->dst_cpu); + *src_nid = cpu_to_node(env->src_cpu); + *dst_nid = cpu_to_node(env->dst_cpu); - if (src_nid == dst_nid || + if (*src_nid == *dst_nid || p->numa_migrate_seq >= sysctl_numa_balancing_settle_count) return false; + /* Calculate private/shared faults on the two nodes */ + *src_faults = 0; + *dst_faults = 0; + for (priv = 0; priv < 2; priv++) { + *src_faults += p->numa_faults[task_faults_idx(*src_nid, priv)]; + *dst_faults += p->numa_faults[task_faults_idx(*dst_nid, priv)]; + } + + return true; +} + +/* Returns true if the destination node has incurred more faults */ +static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) +{ + int src_nid, dst_nid; + unsigned long src, dst; + + if (!migrate_locality_prepare(p, env, &src_nid, &dst_nid, &src, &dst)) + return false; + + /* Move towards node if it is the preferred node */ if (p->numa_preferred_nid == dst_nid) return true; + /* Move towards node if there were more NUMA hinting faults recorded */ + if (dst > src) + return true; + + return false; +} + +static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) +{ + int src_nid, dst_nid; + unsigned long src, dst; + + if (!migrate_locality_prepare(p, env, &src_nid, &dst_nid, &src, &dst)) + return false; + + if (src > dst) + return true; + return false; } #else @@ -4095,6 +4136,14 @@ static inline bool migrate_improves_locality(struct task_struct *p, { return false; } + + +static inline bool migrate_degrades_locality(struct task_struct *p, + struct lb_env *env) +{ + return false; +} + #endif /* @@ -4150,6 +4199,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * 3) too many balance attempts have failed. */ tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd); + if (!tsk_cache_hot) + tsk_cache_hot = migrate_degrades_locality(p, env); if (migrate_improves_locality(p, env)) { #ifdef CONFIG_SCHEDSTATS -- 1.8.1.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>