The following commit has been merged into the sched/core branch of tip: Commit-ID: 5e7f238920174248049ff840eff43c94f3a2e67e Gitweb: https://git.kernel.org/tip/5e7f238920174248049ff840eff43c94f3a2e67e Author: Vincent Donnefort <vincent.donnefort@xxxxxxx> AuthorDate: Tue, 16 Feb 2021 10:35:05 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Wed, 03 Mar 2021 10:33:00 +01:00 cpu/hotplug: CPUHP_BRINGUP_CPU failure exception The atomic states (between CPUHP_AP_IDLE_DEAD and CPUHP_AP_ONLINE) are triggered by the CPUHP_BRINGUP_CPU step. If the latter fails, no atomic state can be rolled back. DEAD callbacks too can't fail and disallow recovery. As a consequence, during hotunplug, the fail injection interface should prohibit all states from CPUHP_BRINGUP_CPU to CPUHP_ONLINE. Signed-off-by: Vincent Donnefort <vincent.donnefort@xxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Link: https://lkml.kernel.org/r/20210216103506.416286-3-vincent.donnefort@xxxxxxx --- kernel/cpu.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 9121edf..680ed8f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1045,9 +1045,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, target); - if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) { - cpuhp_reset_state(st, prev_state); - __cpuhp_kick_ap(st); + if (ret && st->state < prev_state) { + if (st->state == CPUHP_TEARDOWN_CPU) { + cpuhp_reset_state(st, prev_state); + __cpuhp_kick_ap(st); + } else { + WARN(1, "DEAD callback error for CPU%d", cpu); + } } out: @@ -2222,6 +2226,15 @@ static ssize_t write_cpuhp_fail(struct device *dev, return -EINVAL; /* + * DEAD callbacks cannot fail... + * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter + * triggering STARTING callbacks, a failure in this state would + * hinder rollback. + */ + if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU) + return -EINVAL; + + /* * Cannot fail anything that doesn't have callbacks. */ mutex_lock(&cpuhp_state_mutex);