Thus, newidle_balance() is entered with interrupts enabled, which allows (in the next patch) enabling interrupts when the lock is dropped. Signed-off-by: Scott Wood <swood@xxxxxxxxxx> --- kernel/sched/core.c | 7 ++++--- kernel/sched/fair.c | 45 ++++++++++++++++---------------------------- kernel/sched/sched.h | 6 ++---- 3 files changed, 22 insertions(+), 36 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9a2fbf98fd6f..0294beb8d16c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3241,6 +3241,10 @@ static struct rq *finish_task_switch(struct task_struct *prev) } tick_nohz_task_switch(); + + if (is_idle_task(current)) + newidle_balance(); + return rq; } @@ -3919,8 +3923,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) rq->nr_running == rq->cfs.h_nr_running)) { p = pick_next_task_fair(rq, prev, rf); - if (unlikely(p == RETRY_TASK)) - goto restart; /* Assumes fair_sched_class->next == idle_sched_class */ if (!p) { @@ -3931,7 +3933,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) return p; } -restart: #ifdef CONFIG_SMP /* * We must do the balancing pass before put_next_task(), such diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02f323b85b6d..74c3c5280d6b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6758,8 +6758,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { if (rq->nr_running) return 1; - - return newidle_balance(rq, rf) != 0; } #endif /* CONFIG_SMP */ @@ -6934,9 +6932,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; struct task_struct *p; - int new_tasks; -again: if (!sched_fair_runnable(rq)) goto idle; @@ -7050,19 +7046,6 @@ done: __maybe_unused; if (!rf) return NULL; - new_tasks = newidle_balance(rq, rf); - - /* - * Because newidle_balance() releases (and re-acquires) rq->lock, it is - * possible for any higher priority task to appear. In that case we - * must re-start the pick_next_entity() loop. - */ - if (new_tasks < 0) - return RETRY_TASK; - - if (new_tasks > 0) - goto again; - /* * rq is about to be idle, check if we need to update the * lost_idle_time of clock_pelt @@ -10425,14 +10408,23 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { } * 0 - failed, no new tasks * > 0 - success, new (fair) tasks present */ -int newidle_balance(struct rq *this_rq, struct rq_flags *rf) +int newidle_balance(void) { unsigned long next_balance = jiffies + HZ; - int this_cpu = this_rq->cpu; + int this_cpu; struct sched_domain *sd; + struct rq *this_rq; int pulled_task = 0; u64 curr_cost = 0; + preempt_disable(); + this_rq = this_rq(); + this_cpu = this_rq->cpu; + local_bh_disable(); + raw_spin_lock_irq(&this_rq->lock); + + update_rq_clock(this_rq); + update_misfit_status(NULL, this_rq); /* * We must set idle_stamp _before_ calling idle_balance(), such that we @@ -10444,15 +10436,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) * Do not pull tasks towards !active CPUs... */ if (!cpu_active(this_cpu)) - return 0; - - /* - * This is OK, because current is on_cpu, which avoids it being picked - * for load-balance and preemption/IRQs are still disabled avoiding - * further scheduler activity on it and we're being very careful to - * re-start the picking loop. - */ - rq_unpin_lock(this_rq, rf); + goto out_unlock; if (this_rq->avg_idle < sysctl_sched_migration_cost || !READ_ONCE(this_rq->rd->overload)) { @@ -10534,7 +10518,10 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) if (pulled_task) this_rq->idle_stamp = 0; - rq_repin_lock(this_rq, rf); +out_unlock: + raw_spin_unlock_irq(&this_rq->lock); + local_bh_enable(); + preempt_enable(); return pulled_task; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index db3a57675ccf..3d97c51544d7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1504,13 +1504,13 @@ static inline void unregister_sched_domain_sysctl(void) } #endif -extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf); +extern int newidle_balance(void); #else static inline void sched_ttwu_pending(void) { } -static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; } +static inline int newidle_balance(void) { return 0; } #endif /* CONFIG_SMP */ @@ -1742,8 +1742,6 @@ extern const u32 sched_prio_to_wmult[40]; #define ENQUEUE_MIGRATED 0x00 #endif -#define RETRY_TASK ((void *)-1UL) - struct sched_class { const struct sched_class *next; -- 2.18.2