[AMD Official Use Only - Internal Distribution Only] Series Tested-by: Emily Deng <Emily.Deng@xxxxxxx> on sriov environment with vege10 about TDR-1, TDR-2 and TDR-3 test cases. Best wishes Emily Deng >-----Original Message----- >From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Felix >Kuehling >Sent: Friday, December 20, 2019 4:30 PM >To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx >Subject: [PATCH 4/4] drm/amdkfd: Avoid hanging hardware in stop_cpsch > >Don't use the HWS if it's known to be hanging. In a reset also don't try to >destroy the HIQ because that may hang on SRIOV if the KIQ is unresponsive. > >Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> >--- > .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 12 ++++++++---- > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 8 ++++---- > drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 4 ++-- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 ++-- > .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- > 5 files changed, 17 insertions(+), 13 deletions(-) > >diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >index a7e9ec1b3ce3..d7eb6ac37f62 100644 >--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >@@ -946,7 +946,7 @@ static int start_nocpsch(struct device_queue_manager >*dqm) static int stop_nocpsch(struct device_queue_manager *dqm) { > if (dqm->dev->device_info->asic_family == CHIP_HAWAII) >- pm_uninit(&dqm->packets); >+ pm_uninit(&dqm->packets, false); > dqm->sched_running = false; > > return 0; >@@ -1114,20 +1114,24 @@ static int start_cpsch(struct >device_queue_manager *dqm) > return 0; > fail_allocate_vidmem: > fail_set_sched_resources: >- pm_uninit(&dqm->packets); >+ pm_uninit(&dqm->packets, false); > fail_packet_manager_init: > return retval; > } > > static int stop_cpsch(struct device_queue_manager *dqm) { >+ bool hanging; >+ > dqm_lock(dqm); >- unmap_queues_cpsch(dqm, >KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); >+ if (!dqm->is_hws_hang) >+ unmap_queues_cpsch(dqm, >KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); >+ hanging = dqm->is_hws_hang || dqm->is_resetting; > dqm->sched_running = false; > dqm_unlock(dqm); > > kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); >- pm_uninit(&dqm->packets); >+ pm_uninit(&dqm->packets, hanging); > > return 0; > } >diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >index 2d56dc534459..bae706462f96 100644 >--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >@@ -195,9 +195,9 @@ static bool kq_initialize(struct kernel_queue *kq, struct >kfd_dev *dev, } > > /* Uninitialize a kernel queue and free all its memory usages. */ -static void >kq_uninitialize(struct kernel_queue *kq) >+static void kq_uninitialize(struct kernel_queue *kq, bool hanging) > { >- if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) >+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging) > kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, > kq->queue->mqd, > > KFD_PREEMPT_TYPE_WAVEFRONT_RESET, >@@ -337,9 +337,9 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev >*dev, > return NULL; > } > >-void kernel_queue_uninit(struct kernel_queue *kq) >+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging) > { >- kq_uninitialize(kq); >+ kq_uninitialize(kq, hanging); > kfree(kq); > } > >diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c >b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c >index 6cabed06ef5d..dc406e6dee23 100644 >--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c >+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c >@@ -264,10 +264,10 @@ int pm_init(struct packet_manager *pm, struct >device_queue_manager *dqm) > return 0; > } > >-void pm_uninit(struct packet_manager *pm) >+void pm_uninit(struct packet_manager *pm, bool hanging) > { > mutex_destroy(&pm->lock); >- kernel_queue_uninit(pm->priv_queue); >+ kernel_queue_uninit(pm->priv_queue, hanging); > } > > int pm_send_set_resources(struct packet_manager *pm, diff --git >a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >index 087e96838997..8ac680dc90f1 100644 >--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >@@ -883,7 +883,7 @@ struct device_queue_manager >*device_queue_manager_init(struct kfd_dev *dev); void >device_queue_manager_uninit(struct device_queue_manager *dqm); struct >kernel_queue *kernel_queue_init(struct kfd_dev *dev, > enum kfd_queue_type type); >-void kernel_queue_uninit(struct kernel_queue *kq); >+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging); > int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int >pasid); > > /* Process Queue Manager */ >@@ -974,7 +974,7 @@ extern const struct packet_manager_funcs >kfd_vi_pm_funcs; extern const struct packet_manager_funcs kfd_v9_pm_funcs; > > int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); >-void pm_uninit(struct packet_manager *pm); >+void pm_uninit(struct packet_manager *pm, bool hanging); > int pm_send_set_resources(struct packet_manager *pm, > struct scheduling_resources *res); > int pm_send_runlist(struct packet_manager *pm, struct list_head >*dqm_queues); diff --git >a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >index d3eacf72e8db..8fa856e6a03f 100644 >--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c >@@ -374,7 +374,7 @@ int pqm_destroy_queue(struct >process_queue_manager *pqm, unsigned int qid) > /* destroy kernel queue (DIQ) */ > dqm = pqn->kq->dev->dqm; > dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); >- kernel_queue_uninit(pqn->kq); >+ kernel_queue_uninit(pqn->kq, false); > } > > if (pqn->q) { >-- >2.24.1 > >_______________________________________________ >amd-gfx mailing list >amd-gfx@xxxxxxxxxxxxxxxxxxxxx >https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.fre >edesktop.org%2Fmailman%2Flistinfo%2Famd- >gfx&data=02%7C01%7CEmily.Deng%40amd.com%7C3c77bba4d40d4bc6b >e8508d78526dd45%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C >637124274794842900&sdata=vHNAs2FTkSpHYZ2TTux%2F66attN4lf5qSiP >jnlBOM5y0%3D&reserved=0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx