05.01.2021 20:25, Dmitry Osipenko пишет: > 22.12.2020 04:37, Frederic Weisbecker пишет: >> Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP >> kthread (rcuog) to be serviced. >> >> Usually a wake up happening while running the idle task is spotted in >> one of the need_resched() checks carefully placed within the idle loop >> that can break to the scheduler. >> >> Unfortunately within cpuidle the call to rcu_idle_enter() is already >> beyond the last generic need_resched() check. Some drivers may perform >> their own checks like with mwait_idle_with_hints() but many others don't >> and we may halt the CPU with a resched request unhandled, leaving the >> task hanging. >> >> Fix this with performing a last minute need_resched() check after >> calling rcu_idle_enter(). >> >> Reported-by: Paul E. McKenney <paulmck@xxxxxxxxxx> >> Fixes: 1098582a0f6c (sched,idle,rcu: Push rcu_idle deeper into the idle path) >> Cc: stable@xxxxxxxxxxxxxxx >> Cc: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx> >> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> >> Cc: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx> >> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> >> Cc: Ingo Molnar <mingo@xxxxxxxxxx> >> Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx> >> --- >> drivers/cpuidle/cpuidle.c | 33 +++++++++++++++++++++++++-------- >> 1 file changed, 25 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c >> index ef2ea1b12cd8..4cc1ba49ce05 100644 >> --- a/drivers/cpuidle/cpuidle.c >> +++ b/drivers/cpuidle/cpuidle.c >> @@ -134,8 +134,8 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv, >> } >> >> #ifdef CONFIG_SUSPEND >> -static void enter_s2idle_proper(struct cpuidle_driver *drv, >> - struct cpuidle_device *dev, int index) >> +static int enter_s2idle_proper(struct cpuidle_driver *drv, >> + struct cpuidle_device *dev, int index) >> { >> ktime_t time_start, time_end; >> struct cpuidle_state *target_state = &drv->states[index]; >> @@ -151,7 +151,14 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, >> stop_critical_timings(); >> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) >> rcu_idle_enter(); >> - target_state->enter_s2idle(dev, drv, index); >> + /* >> + * Last need_resched() check must come after rcu_idle_enter() >> + * which may wake up RCU internal tasks. >> + */ >> + if (!need_resched()) >> + target_state->enter_s2idle(dev, drv, index); >> + else >> + index = -EBUSY; >> if (WARN_ON_ONCE(!irqs_disabled())) >> local_irq_disable(); >> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) >> @@ -159,10 +166,13 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, >> tick_unfreeze(); >> start_critical_timings(); >> >> - time_end = ns_to_ktime(local_clock()); >> + if (index > 0) { > > index=0 is valid too > >> + time_end = ns_to_ktime(local_clock()); >> + dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start); >> + dev->states_usage[index].s2idle_usage++; >> + } >> >> - dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start); >> - dev->states_usage[index].s2idle_usage++; >> + return index; >> } >> >> /** >> @@ -184,7 +194,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) >> */ >> index = find_deepest_state(drv, dev, U64_MAX, 0, true); >> if (index > 0) { >> - enter_s2idle_proper(drv, dev, index); >> + index = enter_s2idle_proper(drv, dev, index); >> local_irq_enable(); >> } >> return index; >> @@ -234,7 +244,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, >> stop_critical_timings(); >> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) >> rcu_idle_enter(); >> - entered_state = target_state->enter(dev, drv, index); >> + /* >> + * Last need_resched() check must come after rcu_idle_enter() >> + * which may wake up RCU internal tasks. >> + */ >> + if (!need_resched()) >> + entered_state = target_state->enter(dev, drv, index); >> + else >> + entered_state = -EBUSY; >> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) >> rcu_idle_exit(); >> start_critical_timings(); >> > > This patch causes a hardlock on NVIDIA Tegra using today's linux-next. > Disabling coupled idling state helps. Please fix thanks in advance. > This isn't a proper fix, but it works: diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c index 191966dc8d02..ecc5d9b31553 100644 --- a/drivers/cpuidle/cpuidle-tegra.c +++ b/drivers/cpuidle/cpuidle-tegra.c @@ -148,7 +148,7 @@ static int tegra_cpuidle_c7_enter(void) static int tegra_cpuidle_coupled_barrier(struct cpuidle_device *dev) { - if (tegra_pending_sgi()) { + if (tegra_pending_sgi() || need_resched()) { /* * CPU got local interrupt that will be lost after GIC's * shutdown because GIC driver doesn't save/restore the diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 4cc1ba49ce05..2bc52ccc339b 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -248,7 +248,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * Last need_resched() check must come after rcu_idle_enter() * which may wake up RCU internal tasks. */ - if (!need_resched()) + if ((target_state->flags & CPUIDLE_FLAG_COUPLED) || !need_resched()) entered_state = target_state->enter(dev, drv, index); else entered_state = -EBUSY;