Create/destroy the GPUVM context during PDD creation/destruction. Get VM page table base and program it during process registration (HWS) or VMID allocation (non-HWS). Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 20 +++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 +++++++++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 33 ++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1a28dc2..b7d0639 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -129,6 +129,15 @@ static int allocate_vmid(struct device_queue_manager *dqm, set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); program_sh_mem_settings(dqm, qpd); + /* qpd->page_table_base is set earlier when register_process() + * is called, i.e. when the first queue is created. + */ + dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, + qpd->vmid, + qpd->page_table_base); + /* invalidate the VM context after pasid and vmid mapping is set up */ + kfd_flush_tlb(qpd_to_pdd(qpd)); + return 0; } @@ -138,6 +147,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm, { int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; + kfd_flush_tlb(qpd_to_pdd(qpd)); + /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -450,6 +461,8 @@ static int register_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; + struct kfd_process_device *pdd; + uint32_t pd_base; int retval; n = kzalloc(sizeof(*n), GFP_KERNEL); @@ -458,9 +471,16 @@ static int register_process(struct device_queue_manager *dqm, n->qpd = qpd; + pdd = qpd_to_pdd(qpd); + /* Retrieve PD base */ + pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues); + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; + retval = dqm->asic_ops.update_qpd(dqm, qpd); dqm->processes_count++; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index daaa114..0687161 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -521,6 +521,9 @@ struct kfd_process_device { /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ enum kfd_pdd_bound bound; + /* VM context for GPUVM allocations */ + void *vm; + /* Flag used to tell the pdd has dequeued from the dqm. * This is used to prevent dev->dqm->ops.process_termination() from * being called twice when it is already called in IOMMU callback @@ -589,6 +592,14 @@ struct kfd_process { size_t signal_mapped_size; size_t signal_event_count; bool signal_event_limit_reached; + + /* Information used for memory eviction */ + void *kgd_process_info; + /* Eviction fence that is attached to all the BOs of this process. The + * fence will be triggered during eviction and new one will be created + * during restore + */ + struct dma_fence *ef; }; /** @@ -805,6 +816,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint64_t *event_page_offset, uint32_t *event_slot_index); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); +void kfd_flush_tlb(struct kfd_process_device *pdd); + int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); /* Debugfs */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a527c22..e82f4ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -34,6 +34,7 @@ struct mm_struct; #include "kfd_priv.h" +#include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" /* @@ -154,6 +155,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", pdd->dev->id, p->pasid); + if (pdd->vm) + pdd->dev->kfd2kgd->destroy_process_vm( + pdd->dev->kgd, pdd->vm); + list_del(&pdd->per_device_list); if (pdd->qpd.cwsr_kaddr) @@ -186,6 +191,7 @@ static void kfd_process_wq_release(struct work_struct *work) #endif kfd_process_destroy_pdds(p); + dma_fence_put(p->ef); kfd_event_free_process(p); @@ -410,7 +416,18 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->already_dequeued = false; list_add(&pdd->per_device_list, &p->per_device_data); + /* Create the GPUVM context for this specific device */ + if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm, + &p->kgd_process_info, &p->ef)) { + pr_err("Failed to create process VM object\n"); + goto err_create_pdd; + } return pdd; + +err_create_pdd: + list_del(&pdd->per_device_list); + kfree(pdd); + return NULL; } /* @@ -642,6 +659,22 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } +void kfd_flush_tlb(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + const struct kfd2kgd_calls *f2g = dev->kfd2kgd; + + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + /* Nothing to flush until a VMID is assigned, which + * only happens when the first queue is created. + */ + if (pdd->qpd.vmid) + f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + } else { + f2g->invalidate_tlbs(pdd->dev->kgd, pdd->process->pasid); + } +} + #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) -- 2.7.4