To fix issue that ras controller interrupt cannot be triggered anymore after one time nbif uncorrectable error. And error count is stored in nbif ras object for query. Change-Id: Iba482c169fdff3e9c390072c0289a622a522133c Signed-off-by: Le Ma <le.ma@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 25231d6..9a3a65a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -52,6 +52,9 @@ #define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL #define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L +static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); + static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -314,6 +317,7 @@ static void nbio_v7_4_init_registers(struct amdgpu_device *adev) static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) { uint32_t bif_doorbell_intr_cntl; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if); bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); if (REG_GET_FIELD(bif_doorbell_intr_cntl, @@ -324,6 +328,12 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device RAS_CNTLR_INTERRUPT_CLEAR, 1); WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + /* + * clear error status after ras_controller_intr according to + * hw team and count ue number for query + */ + nbio_v7_4_query_ras_error_count(adev, &obj->err_data); + DRM_WARN("RAS controller interrupt triggered by NBIF error\n"); /* ras_controller_int is dedicated for nbif ras error, -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx