When using MES creating a pdd will require talking to the GPU to setup the relevant context. The code here forgot to wake up the GPU in case it was in suspend, this causes KVM to EFAULT for passthrough GPU for example. Also, change the other place where we pause suspend to use the cleaner pm_runtime_resume_and_get helper. Fixes: cc009e613de6 ("drm/amdkfd: Add KFD support for soc21 v3") Signed-off-by: Yunxiang Li <Yunxiang.Li@xxxxxxx> --- It is unclear to me if kfd_process_destroy_pdds also have this problem, or is freeing gtt mem guaranteed to succeed even with the GPU in suspend. drivers/gpu/drm/amd/amdkfd/kfd_process.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index c334432e55b14..618afec5caf8f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1633,6 +1633,11 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, atomic64_set(&pdd->evict_duration_counter, 0); if (dev->kfd->shared_resources.enable_mes) { + retval = pm_runtime_resume_and_get(bdev); + if (retval < 0) { + pr_err("failed to stop autosuspend\n"); + goto err_free_pdd; + } retval = amdgpu_amdkfd_alloc_gtt_mem(adev, AMDGPU_MES_PROC_CTX_SIZE, &pdd->proc_ctx_bo, @@ -1642,6 +1647,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, retval = amdgpu_amdkfd_alloc_gtt_mem( adev, AMDGPU_MES_PROC_CTX_SIZE, &pdd->proc_ctx_bo, &pdd->proc_ctx_gpu_addr, &pdd->proc_ctx_cpu_ptr, false); + pm_runtime_mark_last_busy(bdev); + pm_runtime_put_autosuspend(bdev); if (retval) { dev_err(bdev, "failed to allocate process context bo\n"); @@ -1771,11 +1778,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, * pdd is destroyed. */ if (!pdd->runtime_inuse) { - err = pm_runtime_get_sync(bdev); - if (err < 0) { - pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev); + err = pm_runtime_resume_and_get(bdev); + if (err < 0) return ERR_PTR(err); - } } /* -- 2.34.1