On Thu, Oct 26, 2017 at 1:41 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > From: Yair Shachar <yair.shachar at amd.com> > > Take the dbgmgr lock and unregister before destroying the debug manager. > Do this before destroying the queues. > > Signed-off-by: Yair Shachar <yair.shachar at amd.com> > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 37 +++++++++++++++++++++++--------- > 1 file changed, 27 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 5d93a5c..6caf6df 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -229,17 +229,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, > > mutex_lock(&p->mutex); > > + /* Iterate over all process device data structures and if the > + * pdd is in debug mode, we should first force unregistration, > + * then we will be able to destroy the queues > + */ > + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { > + struct kfd_dev *dev = pdd->dev; > + > + mutex_lock(kfd_get_dbgmgr_mutex()); > + if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { > + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { > + kfd_dbgmgr_destroy(dev->dbgmgr); > + dev->dbgmgr = NULL; > + } > + } > + mutex_unlock(kfd_get_dbgmgr_mutex()); > + } > + > kfd_process_dequeue_from_all_devices(p); > pqm_uninit(&p->pqm); > > - /* Iterate over all process device data structure and check > - * if we should delete debug managers > - */ > - list_for_each_entry(pdd, &p->per_device_data, per_device_list) > - if ((pdd->dev->dbgmgr) && > - (pdd->dev->dbgmgr->pasid == p->pasid)) > - kfd_dbgmgr_destroy(pdd->dev->dbgmgr); > - > mutex_unlock(&p->mutex); > > /* > @@ -468,8 +477,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) > > pr_debug("Unbinding process %d from IOMMU\n", pasid); > > - if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) > - kfd_dbgmgr_destroy(dev->dbgmgr); > + mutex_lock(kfd_get_dbgmgr_mutex()); > + > + if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { > + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { > + kfd_dbgmgr_destroy(dev->dbgmgr); > + dev->dbgmgr = NULL; > + } > + } > + > + mutex_unlock(kfd_get_dbgmgr_mutex()); > > pdd = kfd_get_process_device_data(dev, p); > if (pdd) > -- > 2.7.4 > The patch is fine but to prevent theoretical deadlocks, I suggest to add to this patch a change in the order of locks at kfd_ioctl_dbg_register(), so that p->mutex is taken *before* dbgmgr mutex With that change, this patch is: Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com>