[PATCH 6.12 057/172] drm/amdkfd: pause autosuspend when creating pdd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



6.12-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Jesse.zhang@xxxxxxx <Jesse.zhang@xxxxxxx>

commit 438b39ac74e2a9dc0a5c9d653b7d8066877e86b1 upstream.

When using MES creating a pdd will require talking to the GPU to
setup the relevant context. The code here forgot to wake up the GPU
in case it was in suspend, this causes KVM to EFAULT for passthrough
GPU for example. This issue can be masked if the GPU was woken up by
other things (e.g. opening the KMS node) first and have not yet gone to sleep.

v4: do the allocation of proc_ctx_bo in a lazy fashion
when the first queue is created in a process (Felix)

Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx>
Reviewed-by: Yunxiang Li <Yunxiang.Li@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |   15 +++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c              |   23 +-----------------
 2 files changed, 17 insertions(+), 21 deletions(-)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -205,6 +205,21 @@ static int add_queue_mes(struct device_q
 	if (!down_read_trylock(&adev->reset_domain->sem))
 		return -EIO;
 
+	if (!pdd->proc_ctx_cpu_ptr) {
+		r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+				AMDGPU_MES_PROC_CTX_SIZE,
+				&pdd->proc_ctx_bo,
+				&pdd->proc_ctx_gpu_addr,
+				&pdd->proc_ctx_cpu_ptr,
+				false);
+		if (r) {
+			dev_err(adev->dev,
+				"failed to allocate process context bo\n");
+			return r;
+		}
+		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+	}
+
 	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
 	queue_input.process_id = qpd->pqm->process->pasid;
 	queue_input.page_table_base_addr =  qpd->page_table_base;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(str
 
 		kfd_free_process_doorbells(pdd->dev->kfd, pdd);
 
-		if (pdd->dev->kfd->shared_resources.enable_mes)
+		if (pdd->dev->kfd->shared_resources.enable_mes &&
+			pdd->proc_ctx_cpu_ptr)
 			amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
 						   &pdd->proc_ctx_bo);
 		/*
@@ -1610,7 +1611,6 @@ struct kfd_process_device *kfd_create_pr
 							struct kfd_process *p)
 {
 	struct kfd_process_device *pdd = NULL;
-	int retval = 0;
 
 	if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
 		return NULL;
@@ -1634,21 +1634,6 @@ struct kfd_process_device *kfd_create_pr
 	pdd->user_gpu_id = dev->id;
 	atomic64_set(&pdd->evict_duration_counter, 0);
 
-	if (dev->kfd->shared_resources.enable_mes) {
-		retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
-						AMDGPU_MES_PROC_CTX_SIZE,
-						&pdd->proc_ctx_bo,
-						&pdd->proc_ctx_gpu_addr,
-						&pdd->proc_ctx_cpu_ptr,
-						false);
-		if (retval) {
-			dev_err(dev->adev->dev,
-				"failed to allocate process context bo\n");
-			goto err_free_pdd;
-		}
-		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
-	}
-
 	p->pdds[p->n_pdds++] = pdd;
 	if (kfd_dbg_is_per_vmid_supported(pdd->dev))
 		pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1660,10 +1645,6 @@ struct kfd_process_device *kfd_create_pr
 	idr_init(&pdd->alloc_idr);
 
 	return pdd;
-
-err_free_pdd:
-	kfree(pdd);
-	return NULL;
 }
 
 /**






[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux