The following commit has been merged into the sched/core branch of tip: Commit-ID: e8901061ca0cd9acbd3d29d41d16c69c2bfff9f0 Gitweb: https://git.kernel.org/tip/e8901061ca0cd9acbd3d29d41d16c69c2bfff9f0 Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Thu, 23 May 2024 10:48:09 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Sat, 17 Aug 2024 11:06:42 +02:00 sched: Split DEQUEUE_SLEEP from deactivate_task() As a preparation for dequeue_task() failing, and a second code-path needing to take care of the 'success' path, split out the DEQEUE_SLEEP path from deactivate_task(). Much thanks to Libo for spotting and fixing a TASK_ON_RQ_MIGRATING ordering fail. Fixed-by: Libo Chen <libo.chen@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx> Tested-by: Valentin Schneider <vschneid@xxxxxxxxxx> Link: https://lkml.kernel.org/r/20240727105029.086192709@xxxxxxxxxxxxx --- kernel/sched/core.c | 23 +++++++++++++---------- kernel/sched/sched.h | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4f7a4e9..6c59548 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2036,12 +2036,23 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags) void deactivate_task(struct rq *rq, struct task_struct *p, int flags) { - WRITE_ONCE(p->on_rq, (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING); + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); ASSERT_EXCLUSIVE_WRITER(p->on_rq); + /* + * Code explicitly relies on TASK_ON_RQ_MIGRATING begin set *before* + * dequeue_task() and cleared *after* enqueue_task(). + */ + dequeue_task(rq, p, flags); } +static void block_task(struct rq *rq, struct task_struct *p, int flags) +{ + if (dequeue_task(rq, p, DEQUEUE_SLEEP | flags)) + __block_task(rq, p); +} + /** * task_curr - is this task currently executing on a CPU? * @p: the task in question. @@ -6498,9 +6509,6 @@ static void __sched notrace __schedule(unsigned int sched_mode) !(prev_state & TASK_NOLOAD) && !(prev_state & TASK_FROZEN); - if (prev->sched_contributes_to_load) - rq->nr_uninterruptible++; - /* * __schedule() ttwu() * prev_state = prev->state; if (p->on_rq && ...) @@ -6512,12 +6520,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) * * After this, schedule() must not care about p->state any more. */ - deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); - - if (prev->in_iowait) { - atomic_inc(&rq->nr_iowait); - delayacct_blkio_start(); - } + block_task(rq, prev, DEQUEUE_NOCLOCK); } switch_count = &prev->nvcsw; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6196f90..69ab3b0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -68,6 +68,7 @@ #include <linux/wait_api.h> #include <linux/wait_bit.h> #include <linux/workqueue_api.h> +#include <linux/delayacct.h> #include <trace/events/power.h> #include <trace/events/sched.h> @@ -2585,6 +2586,19 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) sched_update_tick_dependency(rq); } +static inline void __block_task(struct rq *rq, struct task_struct *p) +{ + WRITE_ONCE(p->on_rq, 0); + ASSERT_EXCLUSIVE_WRITER(p->on_rq); + if (p->sched_contributes_to_load) + rq->nr_uninterruptible++; + + if (p->in_iowait) { + atomic_inc(&rq->nr_iowait); + delayacct_blkio_start(); + } +} + extern void activate_task(struct rq *rq, struct task_struct *p, int flags); extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);