On Thu, Aug 12, 2021 at 10:31 PM Josh Don <joshdon@xxxxxxxxxx> wrote: > > /proc/uptime reports idle time by reading the CPUTIME_IDLE field from > the per-cpu kcpustats. However, on NO_HZ systems, idle time is not > continually updated on idle cpus, leading this value to appear > incorrectly small. > > /proc/stat performs an accounting update when reading idle time; we can > use the same approach for uptime. > > With this patch, /proc/stat and /proc/uptime now agree on idle time. > Additionally, the following shows idle time tick up consistently on an > idle machine: > (while true; do cat /proc/uptime; sleep 1; done) | awk '{print $2-prev; prev=$2}' > > Reported-by: Luigi Rizzo <lrizzo@xxxxxxxxxx> > Signed-off-by: Josh Don <joshdon@xxxxxxxxxx> > --- > fs/proc/stat.c | 26 -------------------------- > fs/proc/uptime.c | 13 ++++++++----- > include/linux/kernel_stat.h | 1 + > kernel/sched/cputime.c | 28 ++++++++++++++++++++++++++++ > 4 files changed, 37 insertions(+), 31 deletions(-) > > diff --git a/fs/proc/stat.c b/fs/proc/stat.c > index 6561a06ef905..99796a8a5223 100644 > --- a/fs/proc/stat.c > +++ b/fs/proc/stat.c > @@ -24,16 +24,6 @@ > > #ifdef arch_idle_time > > -static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) > -{ > - u64 idle; > - > - idle = kcs->cpustat[CPUTIME_IDLE]; > - if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) > - idle += arch_idle_time(cpu); > - return idle; > -} > - > static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) > { > u64 iowait; > @@ -46,22 +36,6 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) > > #else > > -static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) > -{ > - u64 idle, idle_usecs = -1ULL; > - > - if (cpu_online(cpu)) > - idle_usecs = get_cpu_idle_time_us(cpu, NULL); > - > - if (idle_usecs == -1ULL) > - /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ > - idle = kcs->cpustat[CPUTIME_IDLE]; > - else > - idle = idle_usecs * NSEC_PER_USEC; > - > - return idle; > -} > - > static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) > { > u64 iowait, iowait_usecs = -1ULL; ... > diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c > index 872e481d5098..9d7629e21164 100644 > --- a/kernel/sched/cputime.c > +++ b/kernel/sched/cputime.c > @@ -227,6 +227,34 @@ void account_idle_time(u64 cputime) > cpustat[CPUTIME_IDLE] += cputime; > } > > +/* > + * Returns the total idle time for the given cpu. > + * @kcs: The kernel_cpustat for the desired cpu. > + * @cpu: The desired cpu. > + */ > +u64 get_idle_time(const struct kernel_cpustat *kcs, int cpu) > +{ > + u64 idle; > + u64 __maybe_unused idle_usecs = -1ULL; > + > +#ifdef arch_idle_time > + idle = kcs->cpustat[CPUTIME_IDLE]; > + if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) > + idle += arch_idle_time(cpu); > +#else > + if (cpu_online(cpu)) > + idle_usecs = get_cpu_idle_time_us(cpu, NULL); > + > + if (idle_usecs == -1ULL) > + /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ > + idle = kcs->cpustat[CPUTIME_IDLE]; > + else > + idle = idle_usecs * NSEC_PER_USEC; > +#endif > + > + return idle; > +} > + > Not sure why you moved get_idle_time() in kernel/sched/cputime.c For builds where CONFIG_PROC_FS is not set, this function is not used/needed.