cancel_work is not backported to all custom kernels. Add a workaround to skip execution of already queued recovery jobs, if the device is already reset. Signed-off-by: Lijo Lazar <lijo.lazar@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 16 ++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bebc73c6822c..c66524e2a56a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5411,6 +5411,8 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + amdgpu_reset_domain_clear_pending(adev->reset_domain); + #if defined(CONFIG_DEBUG_FS) if (!amdgpu_sriov_vf(adev)) cancel_work(&adev->reset_work); @@ -5452,6 +5454,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, bool audio_suspended = false; bool gpu_reset_for_dev_remove = false; + if (amdgpu_reset_domain_in_drain_mode(adev->reset_domain)) + return 0; + gpu_reset_for_dev_remove = test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 4baa300121d8..3ece7267d6ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -120,6 +120,14 @@ void amdgpu_reset_destroy_reset_domain(struct kref *ref) kvfree(reset_domain); } +static void amdgpu_reset_domain_cancel_all_work(struct work_struct *work) +{ + struct amdgpu_reset_domain *reset_domain = + container_of(work, struct amdgpu_reset_domain, clear); + + reset_domain->drain = false; +} + struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, char *wq_name) { @@ -142,6 +150,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d } + INIT_WORK(&reset_domain->clear, amdgpu_reset_domain_cancel_all_work); atomic_set(&reset_domain->in_gpu_reset, 0); atomic_set(&reset_domain->reset_res, 0); init_rwsem(&reset_domain->sem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index b0335a1c5e90..70059eea7e2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -87,6 +87,8 @@ struct amdgpu_reset_domain { struct rw_semaphore sem; atomic_t in_gpu_reset; atomic_t reset_res; + struct work_struct clear; + bool drain; }; #ifdef CONFIG_DEV_COREDUMP @@ -137,6 +139,20 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +static inline void amdgpu_reset_domain_clear_pending(struct amdgpu_reset_domain *domain) +{ + domain->drain = true; + /* queue one more work to the domain queue. Till this work is finished, + * domain is in drain mode. + */ + queue_work(domain->wq, &domain->clear); +} + +static inline bool amdgpu_reset_domain_in_drain_mode(struct amdgpu_reset_domain *domain) +{ + return domain->drain; +} + void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); -- 2.25.1