CONFIG_NO_HZ=y can cause idle/iowait values to decrease. If /proc/stat is monitored with a short interval (e.g. 1 or 2 secs) using sysstat package, sar reports bogus %idle/iowait values because sar expects that idle/iowait values do not decrease unless wraparound happens. This patch makes idle/iowait values visible from /proc/stat increase monotonically, with an assumption that we don't need to worry about wraparound. Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> --- fs/proc/stat.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 files changed, 38 insertions(+), 4 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index e296572..9fff534 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -19,6 +19,40 @@ #define arch_irq_stat() 0 #endif +/* + * CONFIG_NO_HZ=y can cause idle/iowait values to decrease. + * Make sure that idle/iowait values visible from /proc/stat do not decrease. + */ +static inline u64 validate_iowait(u64 iowait, const int cpu) +{ +#ifdef CONFIG_NO_HZ + static u64 max_iowait[NR_CPUS]; + static DEFINE_SPINLOCK(lock); + spin_lock(&lock); + if (likely(iowait >= max_iowait[cpu])) + max_iowait[cpu] = iowait; + else + iowait = max_iowait[cpu]; + spin_unlock(&lock); +#endif + return iowait; +} + +static inline u64 validate_idle(u64 idle, const int cpu) +{ +#ifdef CONFIG_NO_HZ + static u64 max_idle[NR_CPUS]; + static DEFINE_SPINLOCK(lock); + spin_lock(&lock); + if (likely(idle >= max_idle[cpu])) + max_idle[cpu] = idle; + else + idle = max_idle[cpu]; + spin_unlock(&lock); +#endif + return idle; +} + #ifdef arch_idle_time static cputime64_t get_idle_time(int cpu) @@ -28,7 +62,7 @@ static cputime64_t get_idle_time(int cpu) idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) idle += arch_idle_time(cpu); - return idle; + return validate_idle(idle, cpu); } static cputime64_t get_iowait_time(int cpu) @@ -38,7 +72,7 @@ static cputime64_t get_iowait_time(int cpu) iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; if (cpu_online(cpu) && nr_iowait_cpu(cpu)) iowait += arch_idle_time(cpu); - return iowait; + return validate_iowait(iowait, cpu); } #else @@ -56,7 +90,7 @@ static u64 get_idle_time(int cpu) else idle = usecs_to_cputime64(idle_time); - return idle; + return validate_idle(idle, cpu); } static u64 get_iowait_time(int cpu) @@ -72,7 +106,7 @@ static u64 get_iowait_time(int cpu) else iowait = usecs_to_cputime64(iowait_time); - return iowait; + return validate_iowait(iowait, cpu); } #endif -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html