On Tue, Apr 7, 2020 at 3:41 PM Sumit Garg <sumit.garg@xxxxxxxxxx> wrote: > > A race condition leading to a kernel crash is observed during invocation > of ieee80211_register_hw() on a dragonboard410c device having wcn36xx > driver built as a loadable module along with a wifi manager in user-space > waiting for a wifi device (wlanX) to be active. > > Sequence diagram for a particular kernel crash scenario: > > user-space ieee80211_register_hw() ieee80211_tasklet_handler() > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > | | | > |<---phy0----wiphy_register() | > |-----iwd if_add---->| | just a nitpick, a better one would be (iwd: if_add + ap_start) since we need to have 'iwctl ap start' to trigger the interrupts. > | |<---IRQ----(RX packet) > | Kernel crash | > | due to unallocated | > | workqueue. | > | | | > | alloc_ordered_workqueue() | > | | | > | Misc wiphy init. | > | | | > | ieee80211_if_add() | > | | | > > As evident from above sequence diagram, this race condition isn't specific > to a particular wifi driver but rather the initialization sequence in > ieee80211_register_hw() needs to be fixed. So re-order the initialization > sequence and the updated sequence diagram would look like: > > user-space ieee80211_register_hw() ieee80211_tasklet_handler() > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > | | | > | alloc_ordered_workqueue() | > | | | > | Misc wiphy init. | > | | | > |<---phy0----wiphy_register() | > |-----iwd if_add---->| | same as above. > | |<---IRQ----(RX packet) > | | | > | ieee80211_if_add() | > | | | > > Cc: <stable@xxxxxxxxxxxxxxx> > Signed-off-by: Sumit Garg <sumit.garg@xxxxxxxxxx> > --- > > Changes in v2: > - Move rtnl_unlock() just after ieee80211_init_rate_ctrl_alg(). > - Update sequence diagrams in commit message for more clarification. > > net/mac80211/main.c | 22 +++++++++++++--------- > 1 file changed, 13 insertions(+), 9 deletions(-) > > diff --git a/net/mac80211/main.c b/net/mac80211/main.c > index 4c2b5ba..d497129 100644 > --- a/net/mac80211/main.c > +++ b/net/mac80211/main.c > @@ -1051,7 +1051,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; > if (hw->max_signal <= 0) { > result = -EINVAL; > - goto fail_wiphy_register; > + goto fail_workqueue; > } > } > > @@ -1113,7 +1113,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > > result = ieee80211_init_cipher_suites(local); > if (result < 0) > - goto fail_wiphy_register; > + goto fail_workqueue; > > if (!local->ops->remain_on_channel) > local->hw.wiphy->max_remain_on_channel_duration = 5000; > @@ -1139,10 +1139,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > > local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM; > > - result = wiphy_register(local->hw.wiphy); > - if (result < 0) > - goto fail_wiphy_register; > - > /* > * We use the number of queues for feature tests (QoS, HT) internally > * so restrict them appropriately. > @@ -1207,6 +1203,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > goto fail_rate; > } > > + rtnl_unlock(); > + > if (local->rate_ctrl) { > clear_bit(IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, hw->flags); > if (local->rate_ctrl->ops->capa & RATE_CTRL_CAPA_VHT_EXT_NSS_BW) > @@ -1254,6 +1252,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > local->sband_allocated |= BIT(band); > } > > + result = wiphy_register(local->hw.wiphy); > + if (result < 0) > + goto fail_wiphy_register; > + > + rtnl_lock(); > + > /* add one default STA interface if supported */ > if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) && > !ieee80211_hw_check(hw, NO_AUTO_VIF)) { > @@ -1293,6 +1297,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > #if defined(CONFIG_INET) || defined(CONFIG_IPV6) > fail_ifa: > #endif > + wiphy_unregister(local->hw.wiphy); > + fail_wiphy_register: > rtnl_lock(); > rate_control_deinitialize(local); > ieee80211_remove_interfaces(local); > @@ -1302,8 +1308,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) > ieee80211_led_exit(local); > destroy_workqueue(local->workqueue); > fail_workqueue: > - wiphy_unregister(local->hw.wiphy); > - fail_wiphy_register: > if (local->wiphy_ciphers_allocated) > kfree(local->hw.wiphy->cipher_suites); > kfree(local->int_scan_req); > @@ -1353,8 +1357,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) > skb_queue_purge(&local->skb_queue_unreliable); > skb_queue_purge(&local->skb_queue_tdls_chsw); > > - destroy_workqueue(local->workqueue); > wiphy_unregister(local->hw.wiphy); > + destroy_workqueue(local->workqueue); > ieee80211_led_exit(local); > kfree(local->int_scan_req); > } > -- > 2.7.4 > -- Thanks, Regards, Chaitanya T K.