On 10/25/2015 03:26 AM, Peter Zijlstra wrote: > On Sat, Oct 24, 2015 at 10:23:14PM -0700, Joonwoo Park wrote: >> @@ -1069,7 +1069,7 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new >> { >> lockdep_assert_held(&rq->lock); >> >> - dequeue_task(rq, p, 0); >> + dequeue_task(rq, p, DEQUEUE_MIGRATING); >> p->on_rq = TASK_ON_RQ_MIGRATING; >> set_task_cpu(p, new_cpu); >> raw_spin_unlock(&rq->lock); > >> @@ -5656,7 +5671,7 @@ static void detach_task(struct task_struct *p, struct lb_env *env) >> { >> lockdep_assert_held(&env->src_rq->lock); >> >> - deactivate_task(env->src_rq, p, 0); >> + deactivate_task(env->src_rq, p, DEQUEUE_MIGRATING); >> p->on_rq = TASK_ON_RQ_MIGRATING; >> set_task_cpu(p, env->dst_cpu); >> } > > Also note that on both sites we also set TASK_ON_RQ_MIGRATING -- albeit > late. Can't you simply set that earlier (and back to QUEUED later) and > test for task_on_rq_migrating() instead of blowing up the fastpath like > you did? > Yes it's doable. I also find it's much simpler. Please find patch v2. I verified v2 does same job as v1 by comparing sched_stat_wait time with sched_switch - sched_wakeup timestamp. Thanks, Joonwoo
>From 98d615d46211a90482a0f9b7204265c54bba8520 Mon Sep 17 00:00:00 2001 From: Joonwoo Park <joonwoop@xxxxxxxxxxxxxx> Date: Mon, 26 Oct 2015 16:37:47 -0700 Subject: [PATCH v2] sched: fix incorrect wait time and wait count statistics At present scheduler resets task's wait start timestamp when the task migrates to another rq. This misleads scheduler itself into reporting less wait time than actual by omitting time spent for waiting prior to migration and also more wait count than actual by counting migration as wait end event which can be seen by trace or /proc/<pid>/sched with CONFIG_SCHEDSTATS=y. Carry forward migrating task's wait time prior to migration and don't count migration as a wait end event to fix such statistics error. In order to determine whether task is migrating mark task->on_rq with TASK_ON_RQ_MIGRATING while dequeuing and enqueuing due to migration. To: Ingo Molnar <mingo@xxxxxxxxxx> To: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: linux-kernel@xxxxxxxxxxxxxxx Signed-off-by: Joonwoo Park <joonwoop@xxxxxxxxxxxxxx> --- Changes in v2: * Set p->on_rq = TASK_ON_RQ_MIGRATING while doing migration dequeue/enqueue and check whether task's migrating with task_on_rq_migrating(). kernel/sched/core.c | 4 ++-- kernel/sched/fair.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bcd214e..d9e4ad5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1069,8 +1069,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new { lockdep_assert_held(&rq->lock); - dequeue_task(rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING; + dequeue_task(rq, p, 0); set_task_cpu(p, new_cpu); raw_spin_unlock(&rq->lock); @@ -1078,8 +1078,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new raw_spin_lock(&rq->lock); BUG_ON(task_cpu(p) != new_cpu); - p->on_rq = TASK_ON_RQ_QUEUED; enqueue_task(rq, p, 0); + p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(rq, p, 0); return rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9a5e60f..7609576 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -740,7 +740,11 @@ static void update_curr_fair(struct rq *rq) static inline void update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { - schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq))); + schedstat_set(se->statistics.wait_start, + task_on_rq_migrating(task_of(se)) && + likely(rq_clock(rq_of(cfs_rq)) > se->statistics.wait_start) ? + rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start : + rq_clock(rq_of(cfs_rq))); } /* @@ -759,6 +763,13 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) static void update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { + if (task_on_rq_migrating(task_of(se))) { + schedstat_set(se->statistics.wait_start, + rq_clock(rq_of(cfs_rq)) - + se->statistics.wait_start); + return; + } + schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max, rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start)); schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1); @@ -5656,8 +5667,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env) { lockdep_assert_held(&env->src_rq->lock); - deactivate_task(env->src_rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING; + deactivate_task(env->src_rq, p, 0); set_task_cpu(p, env->dst_cpu); } @@ -5790,8 +5801,8 @@ static void attach_task(struct rq *rq, struct task_struct *p) lockdep_assert_held(&rq->lock); BUG_ON(task_rq(p) != rq); - p->on_rq = TASK_ON_RQ_QUEUED; activate_task(rq, p, 0); + p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(rq, p, 0); } -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, hosted by The Linux Foundation