If GPU hang, driver will fail to flush tlb, return the hang error to callers, make callers have a chance to handle the error. Signed-off-by: Dennis Li <dennis.li@xxxxxxx> Change-Id: Ie305ad0a77675f6eab7d5b8f68e279b7f4e7a8b9 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index e9b96ad3d9a5..18e243183b5e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1488,7 +1488,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, peer_pdd = kfd_get_process_device_data(peer, p); if (WARN_ON_ONCE(!peer_pdd)) continue; - kfd_flush_tlb(peer_pdd); + err = kfd_flush_tlb(peer_pdd); } kfree(devices_arr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 21eb0998c4ae..d636cbf7d32f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -263,6 +263,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, struct queue *q) { int allocated_vmid = -1, i; + int ret = 0; for (i = dqm->dev->vm_info.first_vmid_kfd; i <= dqm->dev->vm_info.last_vmid_kfd; i++) { @@ -295,13 +296,26 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd)); + ret = kfd_flush_tlb(qpd_to_pdd(qpd)); + if (ret) { + pr_err("Failed to flush tlb\n"); + goto pro_failed; + } if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); return 0; + +pro_failed: + /* Release the vmid mapping */ + set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + dqm->vmid_pasid[qpd->vmid] = 0; + + qpd->vmid = 0; + q->properties.vmid = 0; + return ret; } static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, @@ -326,12 +340,17 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { + int ret = 0; + /* On GFX v7, CP doesn't flush TC at dequeue */ if (q->device->device_info->asic_family == CHIP_HAWAII) if (flush_texture_cache_nocpsch(q->device, qpd)) pr_err("Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd)); + ret = kfd_flush_tlb(qpd_to_pdd(qpd)); + if (ret) { + pr_err("Failed to flush tlb\n"); + } /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -795,7 +814,9 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->kgd, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd); + ret = kfd_flush_tlb(pdd); + if (ret) + goto out; } /* Take a safe reference to the mm_struct, which may otherwise diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 51ba2020732e..31ea72946d06 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1081,7 +1081,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, void kfd_signal_reset_event(struct kfd_dev *dev); -void kfd_flush_tlb(struct kfd_process_device *pdd); +int kfd_flush_tlb(struct kfd_process_device *pdd); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8616a204e4c3..3919cc88813c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1444,21 +1444,24 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } -void kfd_flush_tlb(struct kfd_process_device *pdd) +int kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; + int ret = 0; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. */ if (pdd->qpd.vmid) - amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + ret = amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, pdd->qpd.vmid); } else { - amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + ret = amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, pdd->process->pasid); } + + return ret; } #if defined(CONFIG_DEBUG_FS) -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx