Hi Alex, On 02/04/2024 02:42, Liu, Shaoyun wrote:
[AMD Official Use Only - General] [AMD Official Use Only - General] Comments inline -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Alex Deucher Sent: Saturday, March 30, 2024 10:01 AM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx> Subject: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers Makes it easier to review the logs when there are MES errors. Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 65 ++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 072c478665ade..73a4bb0f5ba0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -100,19 +100,51 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { .insert_nop = amdgpu_ring_insert_nop, }; +static const char *mes_v11_0_opcodes[] = { + "MES_SCH_API_SET_HW_RSRC", + "MES_SCH_API_SET_SCHEDULING_CONFIG", + "MES_SCH_API_ADD_QUEUE" + "MES_SCH_API_REMOVE_QUEUE" + "MES_SCH_API_PERFORM_YIELD" + "MES_SCH_API_SET_GANG_PRIORITY_LEVEL" + "MES_SCH_API_SUSPEND" + "MES_SCH_API_RESUME" + "MES_SCH_API_RESET" + "MES_SCH_API_SET_LOG_BUFFER" + "MES_SCH_API_CHANGE_GANG_PRORITY" + "MES_SCH_API_QUERY_SCHEDULER_STATUS" + "MES_SCH_API_PROGRAM_GDS" + "MES_SCH_API_SET_DEBUG_VMID" + "MES_SCH_API_MISC" + "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE" + "MES_SCH_API_AMD_LOG" +}; + +static const char *mes_v11_0_misc_opcodes[] = { + "MESAPI_MISC__WRITE_REG", + "MESAPI_MISC__INV_GART", + "MESAPI_MISC__QUERY_STATUS", + "MESAPI_MISC__READ_REG", + "MESAPI_MISC__WAIT_REG_MEM", + "MESAPI_MISC__SET_SHADER_DEBUGGER", +}; + static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, void *pkt, int size, int api_status_off) { int ndw = size / 4; signed long r; - union MESAPI__ADD_QUEUE *x_pkt = pkt; + union MESAPI__MISC *x_pkt = pkt; struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; unsigned long flags; signed long timeout = adev->usec_timeout; + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; + if (amdgpu_emu_mode) { timeout *= 100; } else if (amdgpu_sriov_vf(adev)) { @@ -135,13 +167,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_ring_commit(ring); spin_unlock_irqrestore(&mes->ring_lock, fl - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); + if (x_pkt->header.opcode == MES_SCH_API_MISC) { + if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)) + dev_err(adev->dev, "MES msg=%s (%s) was emitted\n", [shaoyunl] Shouldn't we use DRM_DEBUG for valid condition ? Regards Shaoyun.liu + mes_v11_0_opcodes[x_pkt->header.opcode], + mes_v11_0_misc_opcodes[x_pkt->opcode]); + else + dev_err(adev->dev, "MES msg=%s (%d) was emitted\n", + mes_v11_0_opcodes[x_pkt->header.opcode], + x_pkt->opcode); + } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes)) + dev_err(adev->dev, "MES msg=%s was emitted\n", + mes_v11_0_opcodes[x_pkt->header.opcode]); + else + dev_err(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); if (r < 1) { - DRM_ERROR("MES failed to response msg=%d\n", - x_pkt->header.opcode); + if (x_pkt->header.opcode == MES_SCH_API_MISC) { + if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)) + dev_err(adev->dev, "MES failed to response msg=%s (%s)\n", + mes_v11_0_opcodes[x_pkt->header.opcode], + mes_v11_0_misc_opcodes[x_pkt->opcode]); + else + dev_err(adev->dev, "MES failed to response msg=%s (%d)\n", + mes_v11_0_opcodes[x_pkt->header.opcode], x_pkt->opcode); + } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes)) + dev_err(adev->dev, "MES failed to response msg=%s\n", + mes_v11_0_opcodes[x_pkt->header.opcode]); + else + dev_err(adev->dev, "MES failed to response msg=%d\n", + x_pkt->header.opcode);
Please consider this small reformatting here for better readability: static const char * amdgpu_mes_find_op_name(union MESAPI__MISC *x_pkt) { const char *op_name = NULL; if (x_pkt->header.opcode == MES_SCH_API_MISC) { if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)) op_name = mes_v11_0_misc_opcodes[x_pkt->opcode]; } else { if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes)) op_name = mes_v11_0_opcodes[x_pkt->header.opcode]; } return op_name; } op_name = amdgpu_mes_find_op_name(x_pkt) if (op_name) DRM_DEBUG_DRIVER("MES msg=%s was emitted\n", op_name); else DRM_DEBUG_DRIVER("MES msg=%d was emitted\n", x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); if (r < 1) { if (op_name) dev_err("MES failed to response msg=%s \n", op_name); else dev_err(adev->dev, "MES failed to response msg=%d\n", x_pkt->header.opcode); } - Shashank
while (halt_if_hws_hang) schedule(); -- 2.44.0