Update amdgv_sriovmsg.h and mxgpu_nv.h to add new definitions for CPER support on VFs. PMFW ACA messages are not available on VFs, and VFs must query CPERs from host. Signed-off-by: Tony Yi <Tony.Yi@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 40 ++++++++++++++------- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 2 ++ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index b4f9c2f4e92c..d6ac2652f0ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -97,11 +97,12 @@ union amd_sriov_msg_feature_flags { uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; - uint32_t vcn_rb_decouple : 1; + uint32_t vcn_rb_decouple : 1; uint32_t mes_info_dump_enable : 1; uint32_t ras_caps : 1; uint32_t ras_telemetry : 1; - uint32_t reserved : 21; + uint32_t ras_cper : 1; + uint32_t reserved : 20; } flags; uint32_t all; }; @@ -328,21 +329,25 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_READY_TO_RESET = 201, MB_REQ_MSG_RAS_POISON = 202, MB_REQ_RAS_ERROR_COUNT = 203, + MB_REQ_RAS_CPER_DUMP = 204, }; /* mailbox message send from host to guest */ enum amd_sriov_mailbox_response_message { - MB_RES_MSG_CLR_MSG_BUF = 0, - MB_RES_MSG_READY_TO_ACCESS_GPU = 1, - MB_RES_MSG_FLR_NOTIFICATION, - MB_RES_MSG_FLR_NOTIFICATION_COMPLETION, - MB_RES_MSG_SUCCESS, - MB_RES_MSG_FAIL, - MB_RES_MSG_QUERY_ALIVE, - MB_RES_MSG_GPU_INIT_DATA_READY, - MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, - - MB_RES_MSG_TEXT_MESSAGE = 255 + MB_RES_MSG_CLR_MSG_BUF = 0, + MB_RES_MSG_READY_TO_ACCESS_GPU = 1, + MB_RES_MSG_FLR_NOTIFICATION = 2, + MB_RES_MSG_FLR_NOTIFICATION_COMPLETION = 3, + MB_RES_MSG_SUCCESS = 4, + MB_RES_MSG_FAIL = 5, + MB_RES_MSG_QUERY_ALIVE = 6, + MB_RES_MSG_GPU_INIT_DATA_READY = 7, + MB_RES_MSG_RAS_POISON_READY = 8, + MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9, + MB_RES_MSG_GPU_RMA = 10, + MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, + MB_REQ_RAS_CPER_DUMP_READY = 14, + MB_RES_MSG_TEXT_MESSAGE = 255 }; enum amd_sriov_ras_telemetry_gpu_block { @@ -386,11 +391,20 @@ struct amd_sriov_ras_telemetry_error_count { } block[RAS_TELEMETRY_GPU_BLOCK_COUNT]; }; +struct amd_sriov_ras_cper_dump { + uint32_t more; + uint64_t overflow_count; + uint64_t count; + uint64_t wptr; + uint32_t buf[]; +}; + struct amdsriov_ras_telemetry { struct amd_sriov_ras_telemetry_header header; union { struct amd_sriov_ras_telemetry_error_count error_count; + struct amd_sriov_ras_cper_dump cper_dump; } body; }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 9d61d76e1bf9..72c9fceb9d79 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -41,6 +41,7 @@ enum idh_request { IDH_READY_TO_RESET = 201, IDH_RAS_POISON = 202, IDH_REQ_RAS_ERROR_COUNT = 203, + IDH_REQ_RAS_CPER_DUMP = 204, }; enum idh_event { @@ -56,6 +57,7 @@ enum idh_event { IDH_PF_SOFT_FLR_NOTIFICATION, IDH_RAS_ERROR_DETECTED, IDH_RAS_ERROR_COUNT_READY = 11, + IDH_RAS_CPER_DUMP_READY = 14, IDH_TEXT_MESSAGE = 255, }; -- 2.34.1