Backport of upstream commit dc434e056fe1dada20df7ba07f32739d3a701adf The setup/remove_state/instance() functions in the hotplug core code are serialized against concurrent CPU hotplug, but unfortunately not serialized against themself. As a consequence a concurrent invocation of these function results in corruption of the callback machinery because two instances try to invoke callbacks on remote cpus at the same time. This results in missing callback invocations and initiator threads waiting forever on the completion. The obvious solution to replace get_cpu_online() with cpu_hotplug_begin() is not possible because at least one callsite calls into these functions from a get_online_cpu() locked region. Extend the protection scope of the cpuhp_state_mutex from solely protecting the state arrays to cover the callback invocation machinery as well. Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface") Reported-and-tested-by: Bart Van Assche <Bart.VanAssche@xxxxxxxxxxx> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Cc: hpa@xxxxxxxxx Cc: mingo@xxxxxxxxxx Cc: akpm@xxxxxxxxxxxxxxxxxxxx Cc: torvalds@xxxxxxxxxxxxxxxxxxxx Link: http://lkml.kernel.org/r/20170314150645.g4tdyoszlcbajmna@xxxxxxxxxxxxx Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> --- This is backported for v4.9. kernel/cpu.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 217fd2e7f435..99c6c568bc55 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1441,14 +1441,12 @@ static void cpuhp_store_callbacks(enum cpuhp_state state, /* (Un)Install the callbacks for further cpu hotplug operations */ struct cpuhp_step *sp; - mutex_lock(&cpuhp_state_mutex); sp = cpuhp_get_step(state); sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); - mutex_unlock(&cpuhp_state_mutex); } static void *cpuhp_get_teardown_cb(enum cpuhp_state state) @@ -1518,16 +1516,13 @@ static int cpuhp_reserve_state(enum cpuhp_state state) { enum cpuhp_state i; - mutex_lock(&cpuhp_state_mutex); for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { if (cpuhp_ap_states[i].name) continue; cpuhp_ap_states[i].name = "Reserved"; - mutex_unlock(&cpuhp_state_mutex); return i; } - mutex_unlock(&cpuhp_state_mutex); WARN(1, "No more dynamic states available for CPU hotplug\n"); return -ENOSPC; } @@ -1544,6 +1539,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) goto add_node; @@ -1568,11 +1564,10 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, } add_node: ret = 0; - mutex_lock(&cpuhp_state_mutex); hlist_add_head(node, &sp->list); - mutex_unlock(&cpuhp_state_mutex); err: + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); return ret; } @@ -1601,6 +1596,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); /* currently assignments for the ONLINE state are possible */ if (state == CPUHP_AP_ONLINE_DYN) { @@ -1636,6 +1632,8 @@ int __cpuhp_setup_state(enum cpuhp_state state, } } out: + mutex_unlock(&cpuhp_state_mutex); + put_online_cpus(); if (!ret && dyn_state) return state; @@ -1655,6 +1653,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); + if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* @@ -1671,7 +1671,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, } remove: - mutex_lock(&cpuhp_state_mutex); hlist_del(node); mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); @@ -1696,6 +1695,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) BUG_ON(cpuhp_cb_check(state)); get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { WARN(!hlist_empty(&sp->list), @@ -1721,6 +1721,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) } remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); } EXPORT_SYMBOL(__cpuhp_remove_state); -- 2.11.0