This is a note to let you know that I've just added the patch titled pds_core: Prevent health thread from running during reset/remove to the 6.6-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: pds_core-prevent-health-thread-from-running-during-r.patch and it can be found in the queue-6.6 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit 6c0a83490d8019ba7eb1ba172de81b1a151f80e3 Author: Brett Creeley <brett.creeley@xxxxxxx> Date: Mon Jan 29 15:40:30 2024 -0800 pds_core: Prevent health thread from running during reset/remove [ Upstream commit d9407ff11809c6812bb84fe7be9c1367d758e5c8 ] The PCIe reset handlers can run at the same time as the health thread. This can cause the health thread to stomp on the PCIe reset. Fix this by preventing the health thread from running while a PCIe reset is happening. As part of this use timer_shutdown_sync() during reset and remove to make sure the timer doesn't ever get rearmed. Fixes: ffa55858330f ("pds_core: implement pci reset handlers") Signed-off-by: Brett Creeley <brett.creeley@xxxxxxx> Reviewed-by: Shannon Nelson <shannon.nelson@xxxxxxx> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@xxxxxxxxx> Link: https://lore.kernel.org/r/20240129234035.69802-2-brett.creeley@xxxxxxx Signed-off-by: Jakub Kicinski <kuba@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 7cc5a6b94939..6d589ac532a3 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -298,7 +298,7 @@ static int pdsc_init_pf(struct pdsc *pdsc) err_out_teardown: pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING); err_out_unmap_bars: - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); mutex_destroy(&pdsc->config_lock); @@ -425,7 +425,7 @@ static void pdsc_remove(struct pci_dev *pdev) */ pdsc_sriov_configure(pdev, 0); - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); @@ -449,10 +449,24 @@ static void pdsc_remove(struct pci_dev *pdev) devlink_free(dl); } +static void pdsc_stop_health_thread(struct pdsc *pdsc) +{ + timer_shutdown_sync(&pdsc->wdtimer); + if (pdsc->health_work.func) + cancel_work_sync(&pdsc->health_work); +} + +static void pdsc_restart_health_thread(struct pdsc *pdsc) +{ + timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); + mod_timer(&pdsc->wdtimer, jiffies + 1); +} + static void pdsc_reset_prepare(struct pci_dev *pdev) { struct pdsc *pdsc = pci_get_drvdata(pdev); + pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); pdsc_unmap_bars(pdsc); @@ -489,6 +503,7 @@ static void pdsc_reset_done(struct pci_dev *pdev) } pdsc_fw_up(pdsc); + pdsc_restart_health_thread(pdsc); } static const struct pci_error_handlers pdsc_err_handler = {