4.9-stable review patch. If anyone has any objections, please let me know. ------------------ From: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> commit dc434e056fe1dada20df7ba07f32739d3a701adf upstream. The setup/remove_state/instance() functions in the hotplug core code are serialized against concurrent CPU hotplug, but unfortunately not serialized against themself. As a consequence a concurrent invocation of these function results in corruption of the callback machinery because two instances try to invoke callbacks on remote cpus at the same time. This results in missing callback invocations and initiator threads waiting forever on the completion. The obvious solution to replace get_cpu_online() with cpu_hotplug_begin() is not possible because at least one callsite calls into these functions from a get_online_cpu() locked region. Extend the protection scope of the cpuhp_state_mutex from solely protecting the state arrays to cover the callback invocation machinery as well. Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface") Reported-and-tested-by: Bart Van Assche <Bart.VanAssche@xxxxxxxxxxx> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Cc: hpa@xxxxxxxxx Cc: mingo@xxxxxxxxxx Cc: akpm@xxxxxxxxxxxxxxxxxxxx Cc: torvalds@xxxxxxxxxxxxxxxxxxxx Link: http://lkml.kernel.org/r/20170314150645.g4tdyoszlcbajmna@xxxxxxxxxxxxx Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- kernel/cpu.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1441,14 +1441,12 @@ static void cpuhp_store_callbacks(enum c /* (Un)Install the callbacks for further cpu hotplug operations */ struct cpuhp_step *sp; - mutex_lock(&cpuhp_state_mutex); sp = cpuhp_get_step(state); sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); - mutex_unlock(&cpuhp_state_mutex); } static void *cpuhp_get_teardown_cb(enum cpuhp_state state) @@ -1518,16 +1516,13 @@ static int cpuhp_reserve_state(enum cpuh { enum cpuhp_state i; - mutex_lock(&cpuhp_state_mutex); for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { if (cpuhp_ap_states[i].name) continue; cpuhp_ap_states[i].name = "Reserved"; - mutex_unlock(&cpuhp_state_mutex); return i; } - mutex_unlock(&cpuhp_state_mutex); WARN(1, "No more dynamic states available for CPU hotplug\n"); return -ENOSPC; } @@ -1544,6 +1539,7 @@ int __cpuhp_state_add_instance(enum cpuh return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) goto add_node; @@ -1568,11 +1564,10 @@ int __cpuhp_state_add_instance(enum cpuh } add_node: ret = 0; - mutex_lock(&cpuhp_state_mutex); hlist_add_head(node, &sp->list); - mutex_unlock(&cpuhp_state_mutex); err: + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); return ret; } @@ -1601,6 +1596,7 @@ int __cpuhp_setup_state(enum cpuhp_state return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); /* currently assignments for the ONLINE state are possible */ if (state == CPUHP_AP_ONLINE_DYN) { @@ -1636,6 +1632,8 @@ int __cpuhp_setup_state(enum cpuhp_state } } out: + mutex_unlock(&cpuhp_state_mutex); + put_online_cpus(); if (!ret && dyn_state) return state; @@ -1655,6 +1653,8 @@ int __cpuhp_state_remove_instance(enum c return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); + if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* @@ -1671,7 +1671,6 @@ int __cpuhp_state_remove_instance(enum c } remove: - mutex_lock(&cpuhp_state_mutex); hlist_del(node); mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); @@ -1696,6 +1695,7 @@ void __cpuhp_remove_state(enum cpuhp_sta BUG_ON(cpuhp_cb_check(state)); get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { WARN(!hlist_empty(&sp->list), @@ -1721,6 +1721,7 @@ void __cpuhp_remove_state(enum cpuhp_sta } remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); } EXPORT_SYMBOL(__cpuhp_remove_state);