On Wed, Sep 27, 2017 at 7:09 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > Separate device queue termination from process queue manager > termination. Unmap all queues at once instead of one at a time. > Unmap device queues before the PASID is unbound, in the > kfd_process_iommu_unbind_callback. > > When resetting wavefronts in non-HWS mode, do it before the VMID is > released. > > Signed-off-by: Ben Goz <ben.goz at amd.com> > Signed-off-by: shaoyun liu <shaoyun.liu at amd.com> > Signed-off-by: Amber Lin <Amber.Lin at amd.com> > Signed-off-by: Yong Zhao <Yong.Zhao at amd.com> > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 187 ++++++++++++++++----- > .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 + > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 18 +- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 36 ++-- > .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 37 ++-- > 5 files changed, 200 insertions(+), 83 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 0f2a756..d0e7288 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -295,65 +295,73 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, > return retval; > } > > -static int destroy_queue_nocpsch(struct device_queue_manager *dqm, > +/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked > + * to avoid asynchronized access > + */ > +static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, > struct qcm_process_device *qpd, > struct queue *q) > { > int retval; > struct mqd_manager *mqd; > > - retval = 0; > - > - mutex_lock(&dqm->lock); > + mqd = dqm->ops.get_mqd_manager(dqm, > + get_mqd_type_from_queue_type(q->properties.type)); > + if (!mqd) > + return -ENOMEM; > > - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { > - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); > - if (mqd == NULL) { > - retval = -ENOMEM; > - goto out; > - } > + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) > deallocate_hqd(dqm, q); > - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { > - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); > - if (mqd == NULL) { > - retval = -ENOMEM; > - goto out; > - } > + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { > dqm->sdma_queue_count--; > deallocate_sdma_queue(dqm, q->sdma_id); > } else { > pr_debug("q->properties.type %d is invalid\n", > q->properties.type); > - retval = -EINVAL; > - goto out; > + return -EINVAL; > } > + dqm->total_queue_count--; > > retval = mqd->destroy_mqd(mqd, q->mqd, > KFD_PREEMPT_TYPE_WAVEFRONT_RESET, > KFD_UNMAP_LATENCY_MS, > q->pipe, q->queue); > - > - if (retval) > - goto out; > + if (retval == -ETIME) > + qpd->reset_wavefronts = true; > > mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); > > list_del(&q->list); > - if (list_empty(&qpd->queues_list)) > + if (list_empty(&qpd->queues_list)) { > + if (qpd->reset_wavefronts) { > + pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", > + dqm->dev); > + /* dbgdev_wave_reset_wavefronts has to be called before > + * deallocate_vmid(), i.e. when vmid is still in use. > + */ > + dbgdev_wave_reset_wavefronts(dqm->dev, > + qpd->pqm->process); > + qpd->reset_wavefronts = false; > + } > + > deallocate_vmid(dqm, qpd, q); > + } > if (q->properties.is_active) > dqm->queue_count--; > > - /* > - * Unconditionally decrement this counter, regardless of the queue's > - * type > - */ > - dqm->total_queue_count--; > - pr_debug("Total of %d queues are accountable so far\n", > - dqm->total_queue_count); > + return retval; > +} > > -out: > +static int destroy_queue_nocpsch(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd, > + struct queue *q) > +{ > + int retval; > + > + mutex_lock(&dqm->lock); > + retval = destroy_queue_nocpsch_locked(dqm, qpd, q); > mutex_unlock(&dqm->lock); > + > return retval; > } > > @@ -919,10 +927,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, > enum kfd_unmap_queues_filter filter, > uint32_t filter_param) > { > - int retval; > - struct kfd_process_device *pdd; > - > - retval = 0; > + int retval = 0; > > if (!dqm->active_runlist) > return retval; > @@ -946,12 +951,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, > /* should be timed out */ > retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, > QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); > - if (retval) { > - pdd = kfd_get_process_device_data(dqm->dev, > - kfd_get_process(current)); > - pdd->reset_wavefronts = true; > + if (retval) > return retval; > - } > + > pm_release_ib(&dqm->packets); > dqm->active_runlist = false; > > @@ -1017,7 +1019,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, > if (q->properties.is_active) > dqm->queue_count--; > > - execute_queues_cpsch(dqm, false); > + retval = execute_queues_cpsch(dqm, false); > + if (retval == -ETIME) > + qpd->reset_wavefronts = true; > > mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); > > @@ -1107,6 +1111,107 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, > return retval; > } > > +static int process_termination_nocpsch(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd) > +{ > + struct queue *q, *next; > + struct device_process_node *cur, *next_dpn; > + int retval = 0; > + > + mutex_lock(&dqm->lock); > + > + /* Clear all user mode queues */ > + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { > + int ret; > + > + ret = destroy_queue_nocpsch_locked(dqm, qpd, q); > + if (ret) > + retval = ret; > + } > + > + /* Unregister process */ > + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { > + if (qpd == cur->qpd) { > + list_del(&cur->list); > + kfree(cur); > + dqm->processes_count--; > + break; > + } > + } > + > + mutex_unlock(&dqm->lock); > + return retval; > +} > + > + > +static int process_termination_cpsch(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd) > +{ > + int retval; > + struct queue *q, *next; > + struct kernel_queue *kq, *kq_next; > + struct mqd_manager *mqd; > + struct device_process_node *cur, *next_dpn; > + bool unmap_static_queues = false; > + > + retval = 0; > + > + mutex_lock(&dqm->lock); > + > + /* Clean all kernel queues */ > + list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { > + list_del(&kq->list); > + dqm->queue_count--; > + qpd->is_debug = false; > + dqm->total_queue_count--; > + unmap_static_queues = true; > + } > + > + /* Clear all user mode queues */ > + list_for_each_entry(q, &qpd->queues_list, list) { > + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) > + dqm->sdma_queue_count--; > + > + if (q->properties.is_active) > + dqm->queue_count--; > + > + dqm->total_queue_count--; > + } > + > + /* Unregister process */ > + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { > + if (qpd == cur->qpd) { > + list_del(&cur->list); > + kfree(cur); > + dqm->processes_count--; > + break; > + } > + } > + > + retval = execute_queues_cpsch(dqm, unmap_static_queues); > + if (retval || qpd->reset_wavefronts) { > + pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); > + dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); > + qpd->reset_wavefronts = false; > + } > + > + /* lastly, free mqd resources */ > + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { > + mqd = dqm->ops.get_mqd_manager(dqm, > + get_mqd_type_from_queue_type(q->properties.type)); > + if (!mqd) { > + retval = -ENOMEM; > + goto out; > + } > + list_del(&q->list); > + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); > + } > + > +out: > + mutex_unlock(&dqm->lock); > + return retval; > +} > + > struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) > { > struct device_queue_manager *dqm; > @@ -1135,6 +1240,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) > dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; > dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; > dqm->ops.set_cache_memory_policy = set_cache_memory_policy; > + dqm->ops.process_termination = process_termination_cpsch; > break; > case KFD_SCHED_POLICY_NO_HWS: > /* initialize dqm for no cp scheduling */ > @@ -1149,6 +1255,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) > dqm->ops.initialize = initialize_nocpsch; > dqm->ops.uninitialize = uninitialize; > dqm->ops.set_cache_memory_policy = set_cache_memory_policy; > + dqm->ops.process_termination = process_termination_nocpsch; > break; > default: > pr_err("Invalid scheduling policy %d\n", sched_policy); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > index 60d46ce..31c2b1f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > @@ -77,6 +77,8 @@ struct device_process_node { > * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the > * memory apertures. > * > + * @process_termination: Clears all process queues belongs to that device. > + * > */ > > struct device_queue_manager_ops { > @@ -120,6 +122,9 @@ struct device_queue_manager_ops { > enum cache_policy alternate_policy, > void __user *alternate_aperture_base, > uint64_t alternate_aperture_size); > + > + int (*process_termination)(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd); > }; > > struct device_queue_manager_asic_ops { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index b0a5bea..b936a12 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -424,6 +424,12 @@ struct qcm_process_device { > unsigned int queue_count; > unsigned int vmid; > bool is_debug; > + > + /* This flag tells if we should reset all wavefronts on > + * process termination > + */ > + bool reset_wavefronts; > + > /* > * All the memory management data should be here too > */ > @@ -457,6 +463,8 @@ struct kfd_process_device { > /* The device that owns this data. */ > struct kfd_dev *dev; > > + /* The process that owns this kfd_process_device. */ > + struct kfd_process *process; > > /* per-process-per device QCM data structure */ > struct qcm_process_device qpd; > @@ -472,10 +480,12 @@ struct kfd_process_device { > /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ > enum kfd_pdd_bound bound; > > - /* This flag tells if we should reset all > - * wavefronts on process termination > + /* Flag used to tell the pdd has dequeued from the dqm. > + * This is used to prevent dev->dqm->ops.process_termination() from > + * being called twice when it is already called in IOMMU callback > + * function. > */ > - bool reset_wavefronts; > + bool already_dequeued; > }; > > #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) > @@ -657,6 +667,8 @@ struct process_queue_node { > struct list_head process_queue_list; > }; > > +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd); > +void kfd_process_dequeue_from_all_devices(struct kfd_process *p); > int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); > void pqm_uninit(struct process_queue_manager *pqm); > int pqm_create_queue(struct process_queue_manager *pqm, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 0464c31..82ad037 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -171,9 +171,6 @@ static void kfd_process_wq_release(struct work_struct *work) > pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", > pdd->dev->id, p->pasid); > > - if (pdd->reset_wavefronts) > - dbgdev_wave_reset_wavefronts(pdd->dev, p); > - > if (pdd->bound == PDD_BOUND) > amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); > > @@ -236,24 +233,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, > > mutex_lock(&p->mutex); > > - /* In case our notifier is called before IOMMU notifier */ > + kfd_process_dequeue_from_all_devices(p); > pqm_uninit(&p->pqm); > > /* Iterate over all process device data structure and check > - * if we should delete debug managers and reset all wavefronts > + * if we should delete debug managers > */ > - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { > + list_for_each_entry(pdd, &p->per_device_data, per_device_list) > if ((pdd->dev->dbgmgr) && > (pdd->dev->dbgmgr->pasid == p->pasid)) > kfd_dbgmgr_destroy(pdd->dev->dbgmgr); > > - if (pdd->reset_wavefronts) { > - pr_warn("Resetting all wave fronts\n"); > - dbgdev_wave_reset_wavefronts(pdd->dev, p); > - pdd->reset_wavefronts = false; > - } > - } > - > mutex_unlock(&p->mutex); > > /* > @@ -362,8 +352,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, > INIT_LIST_HEAD(&pdd->qpd.queues_list); > INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); > pdd->qpd.dqm = dev->dqm; > - pdd->reset_wavefronts = false; > + pdd->process = p; > pdd->bound = PDD_UNBOUND; > + pdd->already_dequeued = false; > list_add(&pdd->per_device_list, &p->per_device_data); > } > > @@ -492,19 +483,12 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) > if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) > kfd_dbgmgr_destroy(dev->dbgmgr); > > - pqm_uninit(&p->pqm); > - > pdd = kfd_get_process_device_data(dev, p); > - > - if (!pdd) { > - mutex_unlock(&p->mutex); > - return; > - } > - > - if (pdd->reset_wavefronts) { > - dbgdev_wave_reset_wavefronts(pdd->dev, p); > - pdd->reset_wavefronts = false; > - } > + if (pdd) > + /* For GPU relying on IOMMU, we need to dequeue here > + * when PASID is still bound. > + */ > + kfd_process_dequeue_from_device(pdd); > > mutex_unlock(&p->mutex); > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > index 68fe0d9..31ec3ca 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > @@ -63,6 +63,25 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, > return 0; > } > > +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) > +{ > + struct kfd_dev *dev = pdd->dev; > + > + if (pdd->already_dequeued) > + return; > + > + dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); > + pdd->already_dequeued = true; > +} > + > +void kfd_process_dequeue_from_all_devices(struct kfd_process *p) > +{ > + struct kfd_process_device *pdd; > + > + list_for_each_entry(pdd, &p->per_device_data, per_device_list) > + kfd_process_dequeue_from_device(pdd); > +} > + > int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) > { > INIT_LIST_HEAD(&pqm->queues); > @@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) > > void pqm_uninit(struct process_queue_manager *pqm) > { > - int retval; > struct process_queue_node *pqn, *next; > > list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { > - retval = pqm_destroy_queue( > - pqm, > - (pqn->q != NULL) ? > - pqn->q->properties.queue_id : > - pqn->kq->queue->properties.queue_id); > - > - if (retval != 0) { > - pr_err("failed to destroy queue\n"); > - return; > - } > + uninit_queue(pqn->q); > + list_del(&pqn->process_queue_list); > + kfree(pqn); > } > + > kfree(pqm->queue_slot_bitmap); > pqm->queue_slot_bitmap = NULL; > } > @@ -290,9 +302,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) > if (pqn->q) { > dqm = pqn->q->device->dqm; > retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); > - if (retval != 0) > - return retval; > - > uninit_queue(pqn->q); > } > > -- > 2.7.4 > Applied to -next Thanks, Oded