issue: on bare-metal when doing kmd reload test, there are chance that kernel hit fatal error afer driver unloaded/reloaded fix: the cause is that those "idle work" not really stopped and if kmd was is unloaded too quick that were chance that "idle work" run after driver structures already released which introduces this issue. Change-Id: Idb0f7db771e7ca60dba925d1d0f48b1de08dc89e Signed-off-by: Monk Liu <Monk.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 54145ec..69fb5e50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1419,7 +1419,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) } } - mod_delayed_work(system_wq, &adev->late_init_work, + if (!amdgpu_sriov_vf(adev)) + mod_delayed_work(system_wq, &adev->late_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); amdgpu_device_fill_reset_magic(adev); @@ -2087,7 +2088,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; } adev->accel_working = false; - cancel_delayed_work_sync(&adev->late_init_work); + + if (!amdgpu_sriov_vf(adev)) + while (cancel_delayed_work_sync(&adev->late_init_work)) + schedule(); /* to make sure late_init_work really stopped */ + /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index caba610..337db57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -299,7 +299,8 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) return 0; if (!amdgpu_sriov_vf(adev)) - cancel_delayed_work_sync(&adev->uvd.idle_work); + while (cancel_delayed_work_sync(&adev->uvd.idle_work)) + schedule(); /* to make sure idle work really stopped */ for (i = 0; i < adev->uvd.max_handles; ++i) if (atomic_read(&adev->uvd.handles[i])) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a829350..2874fda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -243,7 +243,9 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) return 0; if (!amdgpu_sriov_vf(adev)) - cancel_delayed_work_sync(&adev->vce.idle_work); + while (cancel_delayed_work_sync(&adev->vce.idle_work)) + schedule(); /* to make sure the idle_work really stopped */ + /* TODO: suspending running encoding sessions isn't supported */ return -EINVAL; } -- 2.7.4