[PATCH 15/25] drm/amdkfd: Add GPUVM virtual address space to PDD

Felix.Kuehling@xxxxxxx (Felix Kuehling) · Fri, 26 Jan 2018 20:09:31 -0500

Create/destroy the GPUVM context during PDD creation/destruction.
Get VM page table base and program it during process registration
(HWS) or VMID allocation (non-HWS).

Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 20 +++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              | 13 +++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c           | 33 ++++++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 1a28dc2..b7d0639 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -129,6 +129,15 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
 	program_sh_mem_settings(dqm, qpd);
 
+	/* qpd->page_table_base is set earlier when register_process()
+	 * is called, i.e. when the first queue is created.
+	 */
+	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
+			qpd->vmid,
+			qpd->page_table_base);
+	/* invalidate the VM context after pasid and vmid mapping is set up */
+	kfd_flush_tlb(qpd_to_pdd(qpd));
+
 	return 0;
 }
 
@@ -138,6 +147,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 {
 	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
 
+	kfd_flush_tlb(qpd_to_pdd(qpd));
+
 	/* Release the vmid mapping */
 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
 
@@ -450,6 +461,8 @@ static int register_process(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd)
 {
 	struct device_process_node *n;
+	struct kfd_process_device *pdd;
+	uint32_t pd_base;
 	int retval;
 
 	n = kzalloc(sizeof(*n), GFP_KERNEL);
@@ -458,9 +471,16 @@ static int register_process(struct device_queue_manager *dqm,
 
 	n->qpd = qpd;
 
+	pdd = qpd_to_pdd(qpd);
+	/* Retrieve PD base */
+	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
+
 	mutex_lock(&dqm->lock);
 	list_add(&n->list, &dqm->queues);
 
+	/* Update PD Base in QPD */
+	qpd->page_table_base = pd_base;
+
 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
 
 	dqm->processes_count++;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index daaa114..0687161 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -521,6 +521,9 @@ struct kfd_process_device {
 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
 	enum kfd_pdd_bound bound;
 
+	/* VM context for GPUVM allocations */
+	void *vm;
+
 	/* Flag used to tell the pdd has dequeued from the dqm.
 	 * This is used to prevent dev->dqm->ops.process_termination() from
 	 * being called twice when it is already called in IOMMU callback
@@ -589,6 +592,14 @@ struct kfd_process {
 	size_t signal_mapped_size;
 	size_t signal_event_count;
 	bool signal_event_limit_reached;
+
+	/* Information used for memory eviction */
+	void *kgd_process_info;
+	/* Eviction fence that is attached to all the BOs of this process. The
+	 * fence will be triggered during eviction and new one will be created
+	 * during restore
+	 */
+	struct dma_fence *ef;
 };
 
 /**
@@ -805,6 +816,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		     uint64_t *event_page_offset, uint32_t *event_slot_index);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
+void kfd_flush_tlb(struct kfd_process_device *pdd);
+
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
 
 /* Debugfs */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a527c22..e82f4ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -34,6 +34,7 @@
 struct mm_struct;
 
 #include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
 
 /*
@@ -154,6 +155,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
 				pdd->dev->id, p->pasid);
 
+		if (pdd->vm)
+			pdd->dev->kfd2kgd->destroy_process_vm(
+				pdd->dev->kgd, pdd->vm);
+
 		list_del(&pdd->per_device_list);
 
 		if (pdd->qpd.cwsr_kaddr)
@@ -186,6 +191,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 #endif
 
 	kfd_process_destroy_pdds(p);
+	dma_fence_put(p->ef);
 
 	kfd_event_free_process(p);
 
@@ -410,7 +416,18 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	pdd->already_dequeued = false;
 	list_add(&pdd->per_device_list, &p->per_device_data);
 
+	/* Create the GPUVM context for this specific device */
+	if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
+					    &p->kgd_process_info, &p->ef)) {
+		pr_err("Failed to create process VM object\n");
+		goto err_create_pdd;
+	}
 	return pdd;
+
+err_create_pdd:
+	list_del(&pdd->per_device_list);
+	kfree(pdd);
+	return NULL;
 }
 
 /*
@@ -642,6 +659,22 @@ int kfd_reserved_mem_mmap(struct kfd_process *process,
 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
+void kfd_flush_tlb(struct kfd_process_device *pdd)
+{
+	struct kfd_dev *dev = pdd->dev;
+	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
+
+	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		/* Nothing to flush until a VMID is assigned, which
+		 * only happens when the first queue is created.
+		 */
+		if (pdd->qpd.vmid)
+			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+	} else {
+		f2g->invalidate_tlbs(pdd->dev->kgd, pdd->process->pasid);
+	}
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
-- 
2.7.4