As the race of gpu reset with ras interrupts. we might lose a chance to do gpu recovery. To guarantee the gpu has recovered successfully, we use atomic to save the counts of gpu reset requests, and issue another gpu reset if there are any pending requests. Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 +++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a96b0f17c619..c1f444b74b19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1220,7 +1220,15 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) container_of(work, struct amdgpu_ras, recovery_work); amdgpu_device_gpu_recover(ras->adev, 0); - atomic_set(&ras->in_recovery, 0); + /* if there is no competiton, in_recovery changes from 1 to 0. + * if ras_reset_gpu is called while we are doing gpu recvoery, + * bacause of the atomic protection, we may lose some recovery + * requests. + * So we use atomic_xchg to check the count of requests, and + * issue another gpu reset request to perform the gpu recovery. + */ + if (atomic_xchg(&ras->in_recovery, 0) > 1) + amdgpu_ras_reset_gpu(ras->adev, 0); } static int amdgpu_ras_release_vram(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 2765f2dbb1e6..ba423a4a3013 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -498,7 +498,7 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) + if (atomic_inc_return(&ras->in_recovery) == 1) schedule_work(&ras->recovery_work); return 0; } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx