The patch titled sched: improve migration accuracy has been added to the -mm tree. Its filename is sched-improve-migration-accuracy.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: sched: improve migration accuracy From: Mike Galbraith <efault@xxxxxx> Co-opt rq->timestamp_last_tick to maintain a cache_hot_time evaluation reference timestamp at both tick and sched times to prevent said reference, formerly rq->timestamp_last_tick, from being behind task->last_ran at evaluation time, and to move said reference closer to current time on the remote processor, intent being to improve cache hot evaluation and timestamp adjustment accuracy for task migration. Fix minor sched_time double accounting error which occurs when a task passing through schedule() does not schedule off, and takes the next timer tick. Signed-off-by: Mike Galbraith <efault@xxxxxx> Acked-by: Ingo Molnar <mingo@xxxxxxx> Acked-by: Ken Chen <kenneth.w.chen@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- kernel/sched.c | 34 ++++++++++++++++------------------ 1 files changed, 16 insertions(+), 18 deletions(-) diff -puN kernel/sched.c~sched-improve-migration-accuracy kernel/sched.c --- a/kernel/sched.c~sched-improve-migration-accuracy +++ a/kernel/sched.c @@ -225,7 +225,8 @@ struct rq { unsigned long nr_uninterruptible; unsigned long expired_timestamp; - unsigned long long timestamp_last_tick; + /* Cached timestamp set by update_cpu_clock() */ + unsigned long long most_recent_timestamp; struct task_struct *curr, *idle; unsigned long next_balance; struct mm_struct *prev_mm; @@ -944,8 +945,8 @@ static void activate_task(struct task_st if (!local) { /* Compensate for drifting sched_clock */ struct rq *this_rq = this_rq(); - now = (now - this_rq->timestamp_last_tick) - + rq->timestamp_last_tick; + now = (now - this_rq->most_recent_timestamp) + + rq->most_recent_timestamp; } #endif @@ -1689,8 +1690,8 @@ void fastcall wake_up_new_task(struct ta * Not the local CPU - must adjust timestamp. This should * get optimised away in the !CONFIG_SMP case. */ - p->timestamp = (p->timestamp - this_rq->timestamp_last_tick) - + rq->timestamp_last_tick; + p->timestamp = (p->timestamp - this_rq->most_recent_timestamp) + + rq->most_recent_timestamp; __activate_task(p, rq); if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); @@ -2068,8 +2069,8 @@ static void pull_task(struct rq *src_rq, set_task_cpu(p, this_cpu); inc_nr_running(p, this_rq); enqueue_task(p, this_array); - p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) - + this_rq->timestamp_last_tick; + p->timestamp = (p->timestamp - src_rq->most_recent_timestamp) + + this_rq->most_recent_timestamp; /* * Note that idle threads have a prio of MAX_PRIO, for this test * to be always true for them. @@ -2108,7 +2109,7 @@ int can_migrate_task(struct task_struct if (sd->nr_balance_failed > sd->cache_nice_tries) return 1; - if (task_hot(p, rq->timestamp_last_tick, sd)) + if (task_hot(p, rq->most_recent_timestamp, sd)) return 0; return 1; } @@ -2207,7 +2208,7 @@ skip_queue: } #ifdef CONFIG_SCHEDSTATS - if (task_hot(tmp, busiest->timestamp_last_tick, sd)) + if (task_hot(tmp, busiest->most_recent_timestamp, sd)) schedstat_inc(sd, lb_hot_gained[idle]); #endif @@ -2971,7 +2972,8 @@ EXPORT_PER_CPU_SYMBOL(kstat); static inline void update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now) { - p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick); + p->sched_time += now - p->timestamp; + p->timestamp = rq->most_recent_timestamp = now; } /* @@ -2984,8 +2986,7 @@ unsigned long long current_sched_time(co unsigned long flags; local_irq_save(flags); - ns = max(p->timestamp, task_rq(p)->timestamp_last_tick); - ns = p->sched_time + sched_clock() - ns; + ns = p->sched_time + sched_clock() - p->timestamp; local_irq_restore(flags); return ns; @@ -3176,8 +3177,6 @@ void scheduler_tick(void) update_cpu_clock(p, rq, now); - rq->timestamp_last_tick = now; - if (p == rq->idle) /* Task on the idle queue */ wake_priority_sleeper(rq); @@ -3498,11 +3497,10 @@ switch_tasks: prev->sleep_avg -= run_time; if ((long)prev->sleep_avg <= 0) prev->sleep_avg = 0; - prev->timestamp = prev->last_ran = now; sched_info_switch(prev, next); if (likely(prev != next)) { - next->timestamp = now; + next->timestamp = prev->last_ran = now; rq->nr_switches++; rq->curr = next; ++*switch_count; @@ -5032,8 +5030,8 @@ static int __migrate_task(struct task_st * afterwards, and pretending it was a local activate. * This way is cleaner and logically correct. */ - p->timestamp = p->timestamp - rq_src->timestamp_last_tick - + rq_dest->timestamp_last_tick; + p->timestamp = p->timestamp - rq_src->most_recent_timestamp + + rq_dest->most_recent_timestamp; deactivate_task(p, rq_src); __activate_task(p, rq_dest); if (TASK_PREEMPTS_CURR(p, rq_dest)) _ Patches currently in -mm which might be from efault@xxxxxx are remove-the-syslog-interface-when-printk-is-disabled.patch sched-improve-migration-accuracy.patch readahead-call-scheme.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html