The following commit has been merged into the sched/core branch of tip: Commit-ID: 311b293811a31929c72c790eff48cf767561589f Gitweb: https://git.kernel.org/tip/311b293811a31929c72c790eff48cf767561589f Author: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> AuthorDate: Wed, 03 Mar 2021 11:46:56 +08:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Thu, 04 Mar 2021 09:56:01 +01:00 psi: Add PSI_CPU_FULL state The FULL state doesn't exist for the CPU resource at the system level, but exist at the cgroup level, means all non-idle tasks in a cgroup are delayed on the CPU resource which used by others outside of the cgroup or throttled by the cgroup cpu.max configuration. Co-developed-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> Signed-off-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx> Link: https://lkml.kernel.org/r/20210303034659.91735-2-zhouchengming@xxxxxxxxxxxxx --- include/linux/psi_types.h | 3 ++- kernel/sched/psi.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index b95f321..0a23300 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -50,9 +50,10 @@ enum psi_states { PSI_MEM_SOME, PSI_MEM_FULL, PSI_CPU_SOME, + PSI_CPU_FULL, /* Only per-CPU, to weigh the CPU in the global average: */ PSI_NONIDLE, - NR_PSI_STATES = 6, + NR_PSI_STATES = 7, }; enum psi_aggregators { diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 967732c..2293c45 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -34,7 +34,10 @@ * delayed on that resource such that nobody is advancing and the CPU * goes idle. This leaves both workload and CPU unproductive. * - * (Naturally, the FULL state doesn't exist for the CPU resource.) + * Naturally, the FULL state doesn't exist for the CPU resource at the + * system level, but exist at the cgroup level, means all non-idle tasks + * in a cgroup are delayed on the CPU resource which used by others outside + * of the cgroup or throttled by the cgroup cpu.max configuration. * * SOME = nr_delayed_tasks != 0 * FULL = nr_delayed_tasks != 0 && nr_running_tasks == 0 @@ -225,6 +228,8 @@ static bool test_state(unsigned int *tasks, enum psi_states state) return tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]; case PSI_CPU_SOME: return tasks[NR_RUNNING] > tasks[NR_ONCPU]; + case PSI_CPU_FULL: + return tasks[NR_RUNNING] && !tasks[NR_ONCPU]; case PSI_NONIDLE: return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] || tasks[NR_RUNNING]; @@ -678,8 +683,11 @@ static void record_times(struct psi_group_cpu *groupc, int cpu, } } - if (groupc->state_mask & (1 << PSI_CPU_SOME)) + if (groupc->state_mask & (1 << PSI_CPU_SOME)) { groupc->times[PSI_CPU_SOME] += delta; + if (groupc->state_mask & (1 << PSI_CPU_FULL)) + groupc->times[PSI_CPU_FULL] += delta; + } if (groupc->state_mask & (1 << PSI_NONIDLE)) groupc->times[PSI_NONIDLE] += delta; @@ -1018,7 +1026,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) group->avg_next_update = update_averages(group, now); mutex_unlock(&group->avgs_lock); - for (full = 0; full < 2 - (res == PSI_CPU); full++) { + for (full = 0; full < 2; full++) { unsigned long avg[3]; u64 total; int w;