start_nocpsch would never set dqm->sched_running on Hawaii due to an early return statement. This would trigger asserts in other functions and end up in inconsistent states. Bug: https://github.com/RadeonOpenCompute/ROCm/issues/1624 Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index dd0b952f0173..104b70e61ba0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1004,14 +1004,17 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { + int r = 0; + pr_info("SW scheduler is used"); init_interrupts(dqm); if (dqm->dev->adev->asic_type == CHIP_HAWAII) - return pm_init(&dqm->packet_mgr, dqm); - dqm->sched_running = true; + r = pm_init(&dqm->packet_mgr, dqm); + if (!r) + dqm->sched_running = true; - return 0; + return r; } static int stop_nocpsch(struct device_queue_manager *dqm) -- 2.32.0