The following commit has been merged into the sched/core branch of tip: Commit-ID: c2a295bffeaf9461ecba76dc9e4780c898c94f03 Gitweb: https://git.kernel.org/tip/c2a295bffeaf9461ecba76dc9e4780c898c94f03 Author: Vincent Guittot <vincent.guittot@xxxxxxxxxx> AuthorDate: Mon, 02 Dec 2024 18:45:59 +01:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Mon, 09 Dec 2024 11:48:11 +01:00 sched/fair: Add new cfs_rq.h_nr_runnable With delayed dequeued feature, a sleeping sched_entity remains queued in the rq until its lag has elapsed. As a result, it stays also visible in the statistics that are used to balance the system and in particular the field cfs.h_nr_queued when the sched_entity is associated to a task. Create a new h_nr_runnable that tracks only queued and runnable tasks. Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@xxxxxxx> Link: https://lore.kernel.org/r/20241202174606.4074512-5-vincent.guittot@xxxxxxxxxx --- kernel/sched/debug.c | 1 + kernel/sched/fair.c | 20 ++++++++++++++++++-- kernel/sched/sched.h | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 08d6c2b..fd711cc 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -844,6 +844,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) spread = right_vruntime - left_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); + SEQ_printf(m, " .%-30s: %d\n", "h_nr_runnable", cfs_rq->h_nr_runnable); SEQ_printf(m, " .%-30s: %d\n", "h_nr_queued", cfs_rq->h_nr_queued); SEQ_printf(m, " .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed); SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d6a9447..ed01e72 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5469,6 +5469,7 @@ static void set_delayed(struct sched_entity *se) for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); + cfs_rq->h_nr_runnable--; cfs_rq->h_nr_delayed++; if (cfs_rq_throttled(cfs_rq)) break; @@ -5481,6 +5482,7 @@ static void clear_delayed(struct sched_entity *se) for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); + cfs_rq->h_nr_runnable++; cfs_rq->h_nr_delayed--; if (cfs_rq_throttled(cfs_rq)) break; @@ -5930,7 +5932,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; - long queued_delta, idle_task_delta, delayed_delta, dequeue = 1; + long queued_delta, runnable_delta, idle_task_delta, delayed_delta, dequeue = 1; long rq_h_nr_queued = rq->cfs.h_nr_queued; raw_spin_lock(&cfs_b->lock); @@ -5962,6 +5964,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) rcu_read_unlock(); queued_delta = cfs_rq->h_nr_queued; + runnable_delta = cfs_rq->h_nr_runnable; idle_task_delta = cfs_rq->idle_h_nr_running; delayed_delta = cfs_rq->h_nr_delayed; for_each_sched_entity(se) { @@ -5986,6 +5989,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) idle_task_delta = cfs_rq->h_nr_queued; qcfs_rq->h_nr_queued -= queued_delta; + qcfs_rq->h_nr_runnable -= runnable_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; qcfs_rq->h_nr_delayed -= delayed_delta; @@ -6009,6 +6013,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) idle_task_delta = cfs_rq->h_nr_queued; qcfs_rq->h_nr_queued -= queued_delta; + qcfs_rq->h_nr_runnable -= runnable_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; qcfs_rq->h_nr_delayed -= delayed_delta; } @@ -6036,7 +6041,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; - long queued_delta, idle_task_delta, delayed_delta; + long queued_delta, runnable_delta, idle_task_delta, delayed_delta; long rq_h_nr_queued = rq->cfs.h_nr_queued; se = cfs_rq->tg->se[cpu_of(rq)]; @@ -6071,6 +6076,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) } queued_delta = cfs_rq->h_nr_queued; + runnable_delta = cfs_rq->h_nr_runnable; idle_task_delta = cfs_rq->idle_h_nr_running; delayed_delta = cfs_rq->h_nr_delayed; for_each_sched_entity(se) { @@ -6089,6 +6095,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) idle_task_delta = cfs_rq->h_nr_queued; qcfs_rq->h_nr_queued += queued_delta; + qcfs_rq->h_nr_runnable += runnable_delta; qcfs_rq->idle_h_nr_running += idle_task_delta; qcfs_rq->h_nr_delayed += delayed_delta; @@ -6107,6 +6114,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) idle_task_delta = cfs_rq->h_nr_queued; qcfs_rq->h_nr_queued += queued_delta; + qcfs_rq->h_nr_runnable += runnable_delta; qcfs_rq->idle_h_nr_running += idle_task_delta; qcfs_rq->h_nr_delayed += delayed_delta; @@ -7021,6 +7029,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) enqueue_entity(cfs_rq, se, flags); slice = cfs_rq_min_slice(cfs_rq); + if (!h_nr_delayed) + cfs_rq->h_nr_runnable++; cfs_rq->h_nr_queued++; cfs_rq->idle_h_nr_running += idle_h_nr_running; cfs_rq->h_nr_delayed += h_nr_delayed; @@ -7045,6 +7055,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) se->slice = slice; slice = cfs_rq_min_slice(cfs_rq); + if (!h_nr_delayed) + cfs_rq->h_nr_runnable++; cfs_rq->h_nr_queued++; cfs_rq->idle_h_nr_running += idle_h_nr_running; cfs_rq->h_nr_delayed += h_nr_delayed; @@ -7135,6 +7147,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) break; } + if (!h_nr_delayed) + cfs_rq->h_nr_runnable -= h_nr_queued; cfs_rq->h_nr_queued -= h_nr_queued; cfs_rq->idle_h_nr_running -= idle_h_nr_running; cfs_rq->h_nr_delayed -= h_nr_delayed; @@ -7174,6 +7188,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) se->slice = slice; slice = cfs_rq_min_slice(cfs_rq); + if (!h_nr_delayed) + cfs_rq->h_nr_runnable -= h_nr_queued; cfs_rq->h_nr_queued -= h_nr_queued; cfs_rq->idle_h_nr_running -= idle_h_nr_running; cfs_rq->h_nr_delayed -= h_nr_delayed; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b011081..869d5d3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -647,6 +647,7 @@ struct cfs_rq { struct load_weight load; unsigned int nr_running; unsigned int h_nr_queued; /* SCHED_{NORMAL,BATCH,IDLE} */ + unsigned int h_nr_runnable; /* SCHED_{NORMAL,BATCH,IDLE} */ unsigned int idle_nr_running; /* SCHED_IDLE */ unsigned int idle_h_nr_running; /* SCHED_IDLE */ unsigned int h_nr_delayed;