From: Wyatt Wood <wyatt.wood@xxxxxxx> [Why] Running into bugchecks during stress test where rptr is 0xFFFFFFFF. Typically this is caused by a hard hang, and can come from HW outside of DCN. [How] To prevent bugchecks when writing the DMUB rptr, fist check that the rptr is valid. Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@xxxxxxx> Acked-by: Solomon Chiu <solomon.chiu@xxxxxxx> Signed-off-by: Wyatt Wood <wyatt.wood@xxxxxxx> --- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 1 + drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index ef324fc39315..efb667cf6c98 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -84,6 +84,7 @@ enum dmub_status { DMUB_STATUS_QUEUE_FULL, DMUB_STATUS_TIMEOUT, DMUB_STATUS_INVALID, + DMUB_STATUS_HW_FAILURE, }; /* enum dmub_asic - dmub asic identifier */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index a6188d067d65..77c67222cabd 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -655,13 +655,19 @@ enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub, enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub, uint32_t timeout_us) { - uint32_t i; + uint32_t i, rptr; if (!dmub->hw_init) return DMUB_STATUS_INVALID; for (i = 0; i <= timeout_us; ++i) { - dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + + if (rptr > dmub->inbox1_rb.capacity) + return DMUB_STATUS_HW_FAILURE; + + dmub->inbox1_rb.rptr = rptr; + if (dmub_rb_empty(&dmub->inbox1_rb)) return DMUB_STATUS_OK; -- 2.25.1