From: Ben Greear <greearb@xxxxxxxxxxxxxxx> On my x86-64 system with 6 mtk7915 radios, one or more radios often fails to start upon reboot. So, do up to 3 retries in the pci probe logic. Example of this mitigation working: [ 8.637908] mt7915e 0000:06:00.0: ASIC revision: 79150000 [ 8.638026] mt7915e 0000:06:00.0: mt7915: register_device Driver-Version: 5.17.0-ct [ 28.863203] mt7915e 0000:06:00.0: MCU: Initial Failure: Message 00000010 (cid 10 ext_cid: 0 seq 1) timeout. Last successful cmd: 0x0 [ 28.873960] mt7915e 0000:06:00.0: Failed to get patch semaphore: -110 [ 28.879141] mt7915e 0000:06:00.0: mcu-init: Failed to load firmware, err: -11 [ 28.879143] mt7915e 0000:06:00.0: mt7915_register_device failed, ret: -11 [ 28.884796] mt7915e 0000:06:00.0: mt7915_pci_probe had error on try 1/3, ret: -11 [ 28.891161] mt7915e 0000:06:00.0: ASIC revision: 79150094 [ 28.891261] mt7915e 0000:06:00.0: mt7915: register_device Driver-Version: 5.17.0-ct [ 29.022875] mt7915e 0000:06:00.0: HW/SW Version: 0x8a108a10, Build Time: 20211222184017a [ 29.033934] mt7915e 0000:06:00.0: WM Firmware Version: ____000000, Build Time: 20211222184052 [ 29.051992] mt7915e 0000:06:00.0: WA Firmware Version: DEV_000000, Build Time: 20211222184111 [ 29.446014] mt7915e 0000:06:00.0: mt7915_pci_probe succeeded on try 2/3 A quick test of traffic shows the radio works fine after recovery. Signed-off-by: Ben Greear <greearb@xxxxxxxxxxxxxxx> --- .../net/wireless/mediatek/mt76/mt7915/pci.c | 61 +++++++++++++------ 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c index 6f819c41a4c4..07cb8a699b75 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c @@ -92,8 +92,8 @@ static int mt7915_pci_hif2_probe(struct pci_dev *pdev) return 0; } -static int mt7915_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int _mt7915_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) { struct mt7915_dev *dev; struct mt76_dev *mdev; @@ -101,20 +101,6 @@ static int mt7915_pci_probe(struct pci_dev *pdev, int irq; int ret; - ret = pcim_enable_device(pdev); - if (ret) - return ret; - - ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); - if (ret) - return ret; - - pci_set_master(pdev); - - ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (ret) - return ret; - mt76_pci_disable_aspm(pdev); if (id->device == 0x7916 || id->device == 0x790a) @@ -161,8 +147,11 @@ static int mt7915_pci_probe(struct pci_dev *pdev, } ret = mt7915_register_device(dev); - if (ret) + if (ret) { + dev_err(dev->mt76.dev, "mt7915_register_device failed, ret: %d", + ret); goto free_hif2_irq; + } return 0; @@ -181,6 +170,44 @@ static int mt7915_pci_probe(struct pci_dev *pdev, return ret; } +static int mt7915_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int z; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (ret) + return ret; + + pci_set_master(pdev); + + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + for (z = 0; z<3; z++) { + ret = _mt7915_pci_probe(pdev, id); + if (ret) { + dev_err(&pdev->dev, "mt7915_pci_probe had error on try %d/3, ret: %d", + z + 1, ret); + } else { + /* It is worth a message to let user know we succeeded if + * earlier attempts failed. + */ + if (z > 0) + dev_info(&pdev->dev, "mt7915_pci_probe succeeded on try %d/3", + z + 1); + break; + } + } + return ret; +} + static void mt7915_hif_remove(struct pci_dev *pdev) { struct mt7915_hif *hif = pci_get_drvdata(pdev); -- 2.20.1