From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx> psi's memstall time is counted as simple as exit - entry so far, which ignore the task's off cpu time. Fix it by calc the percentage of off time via task and rq's util and runq load. Signed-off-by: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx> --- kernel/sched/psi.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index cc25a3c..6812c7e 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -182,6 +182,8 @@ struct psi_group psi_system = { static void psi_avgs_work(struct work_struct *work); +static unsigned long psi_memtime_fixup(u32 growth); + static void group_init(struct psi_group *group) { int cpu; @@ -492,6 +494,23 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) return growth; } +static unsigned long psi_memtime_fixup(u32 growth) +{ + struct rq *rq = task_rq(current); + unsigned long growth_fixed = (unsigned long)growth; + + if !(current->policy == SCHED_NORMAL || current->policy == SCHED_BATCH) + return growth_fixed; + + if ((current->in_memstall) + && (current->se.avg.util_avg <= rq->cfs.avg.util_avg) + && current->se.avg.util_avg != 0) + growth_fixed = div64_ul((current->se.avg.util_avg + 1) * growth, + rq->cfs.avg.util_avg + rq->avg_rt.util_avg + 1); + + return growth_fixed; +} + static void init_triggers(struct psi_group *group, u64 now) { struct psi_trigger *t; @@ -658,6 +677,7 @@ static void record_times(struct psi_group_cpu *groupc, u64 now) } if (groupc->state_mask & (1 << PSI_MEM_SOME)) { + delta = psi_memtime_fixup(delta); groupc->times[PSI_MEM_SOME] += delta; if (groupc->state_mask & (1 << PSI_MEM_FULL)) groupc->times[PSI_MEM_FULL] += delta; @@ -928,8 +948,8 @@ void psi_memstall_leave(unsigned long *flags) */ rq = this_rq_lock_irq(&rf); - current->in_memstall = 0; psi_task_change(current, TSK_MEMSTALL, 0); + current->in_memstall = 0; rq_unlock_irq(rq, &rf); } -- 1.9.1