In function flr_work, we should do gpu recovery when no job is running. Fix the logic by inverting it. v2: modify the description Signed-off-by: Liu ChengZhe <ChengZhe.Liu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 3 ++- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 9c07014d9bd6..f5ce9a9f4cf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -262,7 +262,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) - && (amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) + && (!amdgpu_device_has_job_running(adev) || + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) amdgpu_device_gpu_recover(adev, NULL); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 9c23abf9b140..666ed99cc14b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -283,7 +283,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) - && (amdgpu_device_has_job_running(adev) || + && (!amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || -- 2.25.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx