There are a couple of fixes required to enable gfx11 debugging. First, ADD_QUEUE.trap_en is an inappropriate place to toggle a per-process register so move it to SET_SHADER_DEBUGGER.trap_en. When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize the SET_SHADER_DEBUGGER.trap_en setting. Second, to preserve correct save/restore priviledged wave states in coordination with the trap enablement setting, resume suspended waves early in the disable call. NOTE: The AMDGPU_MES_VERSION_MASK check is a place holder as MES FW updates have been reviewed but is awaiting binary creation. Once the binaries have been created, this check may be subject to change. v2: do a trap_en safety check in case old mes doesn't accept unused trap_en d-word. remove unnecessary process termination work around. Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 4 +++- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 14 ++++++-------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 +++++++----- drivers/gpu/drm/amd/include/mes_v11_api_def.h | 1 + 7 files changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 20cc3fffe921..e9091ebfe230 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -928,7 +928,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr, uint32_t spi_gdbg_per_vmid_cntl, const uint32_t *tcp_watch_cntl, - uint32_t flags) + uint32_t flags, + bool trap_en) { struct mes_misc_op_input op_input = {0}; int r; @@ -945,6 +946,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); + if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> + AMDGPU_MES_API_VERSION_SHIFT) >= 14) + op_input.set_shader_debugger.trap_en = trap_en; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index b5f5eed2b5ef..2d6ac30b7135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -294,6 +294,7 @@ struct mes_misc_op_input { } flags; uint32_t spi_gdbg_per_vmid_cntl; uint32_t tcp_watch_cntl[4]; + uint32_t trap_en; } set_shader_debugger; }; }; @@ -361,7 +362,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr, uint32_t spi_gdbg_per_vmid_cntl, const uint32_t *tcp_watch_cntl, - uint32_t flags); + uint32_t flags, + bool trap_en); int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c4e3cb8d44de..1bdaa00c0b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -347,6 +347,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, input->set_shader_debugger.tcp_watch_cntl, sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); + misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; break; default: DRM_ERROR("unsupported misc op (%d) \n", input->op); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 125274445f43..cd34e7aaead4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -349,12 +349,13 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd) { uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; uint32_t flags = pdd->process->dbg_flags; + bool sq_trap_en = !!spi_dbg_cntl; if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) return 0; return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl, - pdd->watch_points, flags); + pdd->watch_points, flags, sq_trap_en); } #define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1 @@ -557,6 +558,10 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind if (!unwind) { uint32_t flags = 0; + int resume_count = resume_queues(target, 0, NULL); + + if (resume_count) + pr_debug("Resumed %d queues\n", resume_count); cancel_work_sync(&target->debug_event_workarea); kfd_dbg_clear_process_address_watch(target); @@ -598,13 +603,6 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind } kfd_dbg_set_workaround(target, false); - - if (!unwind) { - int resume_count = resume_queues(target, 0, NULL); - - if (resume_count) - pr_debug("Resumed %d queues\n", resume_count); - } } static void kfd_dbg_clean_exception_status(struct kfd_process *target) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0c1be91a87c6..ff0a28760494 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -227,8 +227,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.tba_addr = qpd->tba_addr; queue_input.tma_addr = qpd->tma_addr; queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) || - KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0) || - q->properties.is_dbg_wa; + KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0); queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; queue_type = convert_to_mes_queue_type(q->properties.type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index faa7939f35bd..90b86a6ac7bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1863,13 +1863,15 @@ static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *de { bool firmware_supported = true; - /* - * FIXME: GFX11 FW currently not sufficient to deal with CWSR WA. - * Updated FW with API changes coming soon. - */ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { - firmware_supported = false; + uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & + AMDGPU_MES_API_VERSION_MASK) >> + AMDGPU_MES_API_VERSION_SHIFT; + uint32_t mes_rev = dev->gpu->adev->mes.sched_version & + AMDGPU_MES_VERSION_MASK; + + firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64); goto out; } diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index f3c15f18ddb5..0997e999416a 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -575,6 +575,7 @@ struct SET_SHADER_DEBUGGER { } flags; uint32_t spi_gdbg_per_vmid_cntl; uint32_t tcp_watch_cntl[4]; /* TCP_WATCHx_CNTL */ + uint32_t trap_en; }; union MESAPI__MISC { -- 2.25.1