[AMD Official Use Only - General] Reviewed-by: Asad Kamal <asad.kamal@xxxxxxx> Thanks & Regards Asad -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Lazar, Lijo Sent: Friday, November 10, 2023 4:19 PM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: Re: [PATCH] drm/amdgpu: Skip execution of pending reset jobs <Ping> On 11/9/2023 1:08 PM, Lijo Lazar wrote: > cancel_work is not backported to all custom kernels. Add a workaround > to skip execution of already queued recovery jobs, if the device is > already reset. > > Signed-off-by: Lijo Lazar <lijo.lazar@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 9 +++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 16 ++++++++++++++++ > 3 files changed, 30 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index bebc73c6822c..c66524e2a56a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -5411,6 +5411,8 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) > { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > > + amdgpu_reset_domain_clear_pending(adev->reset_domain); > + > #if defined(CONFIG_DEBUG_FS) > if (!amdgpu_sriov_vf(adev)) > cancel_work(&adev->reset_work); > @@ -5452,6 +5454,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, > bool audio_suspended = false; > bool gpu_reset_for_dev_remove = false; > > + if (amdgpu_reset_domain_in_drain_mode(adev->reset_domain)) > + return 0; > + > gpu_reset_for_dev_remove = > test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && > test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); diff > --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c > index 4baa300121d8..3ece7267d6ea 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c > @@ -120,6 +120,14 @@ void amdgpu_reset_destroy_reset_domain(struct kref *ref) > kvfree(reset_domain); > } > > +static void amdgpu_reset_domain_cancel_all_work(struct work_struct > +*work) { > + struct amdgpu_reset_domain *reset_domain = > + container_of(work, struct amdgpu_reset_domain, clear); > + > + reset_domain->drain = false; > +} > + > struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, > char *wq_name) > { > @@ -142,6 +150,7 @@ struct amdgpu_reset_domain > *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d > > } > > + INIT_WORK(&reset_domain->clear, > +amdgpu_reset_domain_cancel_all_work); > atomic_set(&reset_domain->in_gpu_reset, 0); > atomic_set(&reset_domain->reset_res, 0); > init_rwsem(&reset_domain->sem); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h > index b0335a1c5e90..70059eea7e2f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h > @@ -87,6 +87,8 @@ struct amdgpu_reset_domain { > struct rw_semaphore sem; > atomic_t in_gpu_reset; > atomic_t reset_res; > + struct work_struct clear; > + bool drain; > }; > > #ifdef CONFIG_DEV_COREDUMP > @@ -137,6 +139,20 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma > return queue_work(domain->wq, work); > } > > +static inline void amdgpu_reset_domain_clear_pending(struct > +amdgpu_reset_domain *domain) { > + domain->drain = true; > + /* queue one more work to the domain queue. Till this work is finished, > + * domain is in drain mode. > + */ > + queue_work(domain->wq, &domain->clear); } > + > +static inline bool amdgpu_reset_domain_in_drain_mode(struct > +amdgpu_reset_domain *domain) { > + return domain->drain; > +} > + > void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain > *reset_domain); > > void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain > *reset_domain);