Ping.
On 2021-12-08 3:25 a.m., Felix Kuehling wrote:
start_nocpsch would never set dqm->sched_running on Hawaii due to an
early return statement. This would trigger asserts in other functions
and end up in inconsistent states.
Bug: https://github.com/RadeonOpenCompute/ROCm/issues/1624
Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index dd0b952f0173..104b70e61ba0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1004,14 +1004,17 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
+ int r = 0;
+
pr_info("SW scheduler is used");
init_interrupts(dqm);
if (dqm->dev->adev->asic_type == CHIP_HAWAII)
- return pm_init(&dqm->packet_mgr, dqm);
- dqm->sched_running = true;
+ r = pm_init(&dqm->packet_mgr, dqm);
+ if (!r)
+ dqm->sched_running = true;
- return 0;
+ return r;
}
static int stop_nocpsch(struct device_queue_manager *dqm)