When compute fence did not signal, compute ring cannot detect hardware hang because its timeout value is set to be infinite by default. In SR-IOV and passthrough mode, if user does not declare custome timeout value for compute ring, then use gfx ring timeout value as default. So that when there is a ture hardware hang, compute ring can detect it. Change-Id: I794ec0868c6c0aad407749457260ecfee0617c10 Signed-off-by: Jesse Zhang <zhexi.zhang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 13 +------------ 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3b5282b..03ac5a1da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1024,12 +1024,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_device_check_block_size(adev); - ret = amdgpu_device_get_job_timeout_settings(adev); - if (ret) { - dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return ret; - } - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); return ret; @@ -2732,6 +2726,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + r = amdgpu_device_get_job_timeout_settings(adev); + if (r) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return r; + } + /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 420888e..1236245 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1378,10 +1378,12 @@ int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) } /* * There is only one value specified and - * it should apply to all non-compute jobs. + * it should apply to all jobs. */ if (index == 1) adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) + adev->compute_timeout = adev->gfx_timeout; } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index cbcaa7c..9ef53ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -460,18 +460,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, timeout = adev->gfx_timeout; break; case AMDGPU_RING_TYPE_COMPUTE: - /* - * For non-sriov case, no timeout enforce - * on compute ring by default. Unless user - * specifies a timeout for compute ring. - * - * For sriov case, always use the timeout - * as gfx ring - */ - if (!amdgpu_sriov_vf(ring->adev)) - timeout = adev->compute_timeout; - else - timeout = adev->gfx_timeout; + timeout = adev->compute_timeout; break; case AMDGPU_RING_TYPE_SDMA: timeout = adev->sdma_timeout; -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx