[AMD Official Use Only] > -----Original Message----- > From: Limonciello, Mario <Mario.Limonciello@xxxxxxx> > Sent: January 7, 2022 11:50 AM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Limonciello, Mario <Mario.Limonciello@xxxxxxx>; Kazlauskas, Nicholas > <Nicholas.Kazlauskas@xxxxxxx>; Zhuo, Qingqing (Lillian) > <Qingqing.Zhuo@xxxxxxx>; Scott Bruce <smbruce@xxxxxxxxx>; Chris > Hixon <linux-kernel-bugs@xxxxxxxxxxxxx>; spasswolf@xxxxxx > Subject: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on > DCN20/DCN21 > Importance: High > > The WA from commit 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard > hang > on HPD") and commit 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard > hang on HPD to dcn20") causes a regression in s0ix where the system will > fail to resume properly on many laptops. Pull the workarounds out to > avoid that s0ix regression in the common case. This HPD hang happens with > an external device and a new W/A will need to be developed for this in the > future. > > Cc: Kazlauskas Nicholas <Nicholas.Kazlauskas@xxxxxxx> > Cc: Qingqing Zhuo <qingqing.zhuo@xxxxxxx> > Reported-by: Scott Bruce <smbruce@xxxxxxxxx> > Reported-by: Chris Hixon <linux-kernel-bugs@xxxxxxxxxxxxx> > Reported-by: spasswolf@xxxxxx > Link: https://bugzilla.kernel.org/show_bug.cgi?id=215436 > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1821 > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1852 > Fixes: 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard hang on HPD") > Fixes: 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard hang on HPD to > dcn20") > Signed-off-by: Mario Limonciello <mario.limonciello@xxxxxxx> I think the revert is fine once we figure out where we're missing calls to: .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, These are already part of dc_link_detect, so I suspect there's another interface in DC that should be using these. I think the best way to debug this is to revert the patch locally and add a stack dump when DMCUB hangs our times out. That way you can know where the PHY was trying to be accessed without the refclk being on. We had a similar issue in DCN31 which didn't require a W/A like DCN21. I'd like to hold off on merging this until that hang is verified as gone. Regards, Nicholas Kazlauskas > --- > .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 11 +------- > .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 11 +------- > .../display/dc/irq/dcn20/irq_service_dcn20.c | 25 ------------------- > .../display/dc/irq/dcn20/irq_service_dcn20.h | 2 -- > .../display/dc/irq/dcn21/irq_service_dcn21.c | 25 ------------------- > .../display/dc/irq/dcn21/irq_service_dcn21.h | 2 -- > .../gpu/drm/amd/display/dc/irq/irq_service.c | 2 +- > .../gpu/drm/amd/display/dc/irq/irq_service.h | 4 --- > 8 files changed, 3 insertions(+), 79 deletions(-) > > diff --git > a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c > b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c > index 9f35f2e8f971..cac80ba69072 100644 > --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c > +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c > @@ -38,7 +38,6 @@ > #include "clk/clk_11_0_0_offset.h" > #include "clk/clk_11_0_0_sh_mask.h" > > -#include "irq/dcn20/irq_service_dcn20.h" > > #undef FN > #define FN(reg_name, field_name) \ > @@ -223,8 +222,6 @@ void dcn2_update_clocks(struct clk_mgr > *clk_mgr_base, > bool force_reset = false; > bool p_state_change_support; > int total_plane_count; > - int irq_src; > - uint32_t hpd_state; > > if (dc->work_arounds.skip_clock_update) > return; > @@ -242,13 +239,7 @@ void dcn2_update_clocks(struct clk_mgr > *clk_mgr_base, > if (dc->res_pool->pp_smu) > pp_smu = &dc->res_pool->pp_smu->nv_funcs; > > - for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= > DC_IRQ_SOURCE_HPD6; irq_src++) { > - hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs, > irq_src); > - if (hpd_state) > - break; > - } > - > - if (display_count == 0 && !hpd_state) > + if (display_count == 0) > enter_display_off = true; > > if (enter_display_off == safe_to_lower) { > diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c > b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c > index fbda42313bfe..f4dee0e48a67 100644 > --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c > +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c > @@ -42,7 +42,6 @@ > #include "clk/clk_10_0_2_sh_mask.h" > #include "renoir_ip_offset.h" > > -#include "irq/dcn21/irq_service_dcn21.h" > > /* Constants */ > > @@ -129,11 +128,9 @@ static void rn_update_clocks(struct clk_mgr > *clk_mgr_base, > struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; > struct dc *dc = clk_mgr_base->ctx->dc; > int display_count; > - int irq_src; > bool update_dppclk = false; > bool update_dispclk = false; > bool dpp_clock_lowered = false; > - uint32_t hpd_state; > > struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; > > @@ -150,14 +147,8 @@ static void rn_update_clocks(struct clk_mgr > *clk_mgr_base, > > display_count = rn_get_active_display_cnt_wa(dc, > context); > > - for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= > DC_IRQ_SOURCE_HPD5; irq_src++) { > - hpd_state = dc_get_hpd_state_dcn21(dc- > >res_pool->irqs, irq_src); > - if (hpd_state) > - break; > - } > - > /* if we can go lower, go lower */ > - if (display_count == 0 && !hpd_state) { > + if (display_count == 0) { > > rn_vbios_smu_set_dcn_low_power_state(clk_mgr, > DCN_PWR_STATE_LOW_POWER); > /* update power state */ > clk_mgr_base->clks.pwr_state = > DCN_PWR_STATE_LOW_POWER; > diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c > b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c > index 9ccafe007b23..c4b067d01895 100644 > --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c > +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c > @@ -132,31 +132,6 @@ enum dc_irq_source to_dal_irq_source_dcn20( > } > } > > -uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum > dc_irq_source source) > -{ > - const struct irq_source_info *info; > - uint32_t addr; > - uint32_t value; > - uint32_t current_status; > - > - info = find_irq_source_info(irq_service, source); > - if (!info) > - return 0; > - > - addr = info->status_reg; > - if (!addr) > - return 0; > - > - value = dm_read_reg(irq_service->ctx, addr); > - current_status = > - get_reg_field_value( > - value, > - HPD0_DC_HPD_INT_STATUS, > - DC_HPD_SENSE); > - > - return current_status; > -} > - > static bool hpd_ack( > struct irq_service *irq_service, > const struct irq_source_info *info) > diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h > b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h > index 4d69ab24ca25..aee4b37999f1 100644 > --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h > +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h > @@ -31,6 +31,4 @@ > struct irq_service *dal_irq_service_dcn20_create( > struct irq_service_init_data *init_data); > > -uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum > dc_irq_source source); > - > #endif > diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c > b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c > index 235294534c43..0f15bcada4e9 100644 > --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c > +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c > @@ -134,31 +134,6 @@ static enum dc_irq_source > to_dal_irq_source_dcn21(struct irq_service *irq_servic > return DC_IRQ_SOURCE_INVALID; > } > > -uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum > dc_irq_source source) > -{ > - const struct irq_source_info *info; > - uint32_t addr; > - uint32_t value; > - uint32_t current_status; > - > - info = find_irq_source_info(irq_service, source); > - if (!info) > - return 0; > - > - addr = info->status_reg; > - if (!addr) > - return 0; > - > - value = dm_read_reg(irq_service->ctx, addr); > - current_status = > - get_reg_field_value( > - value, > - HPD0_DC_HPD_INT_STATUS, > - DC_HPD_SENSE); > - > - return current_status; > -} > - > static bool hpd_ack( > struct irq_service *irq_service, > const struct irq_source_info *info) > diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h > b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h > index 616470e32380..da2bd0e93d7a 100644 > --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h > +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h > @@ -31,6 +31,4 @@ > struct irq_service *dal_irq_service_dcn21_create( > struct irq_service_init_data *init_data); > > -uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum > dc_irq_source source); > - > #endif > diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c > b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c > index 4db1133e4466..a2a4fbeb83f8 100644 > --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c > +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c > @@ -79,7 +79,7 @@ void dal_irq_service_destroy(struct irq_service > **irq_service) > *irq_service = NULL; > } > > -const struct irq_source_info *find_irq_source_info( > +static const struct irq_source_info *find_irq_source_info( > struct irq_service *irq_service, > enum dc_irq_source source) > { > diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h > b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h > index e60b82480093..dbfcb096eedd 100644 > --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h > +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h > @@ -69,10 +69,6 @@ struct irq_service { > const struct irq_service_funcs *funcs; > }; > > -const struct irq_source_info *find_irq_source_info( > - struct irq_service *irq_service, > - enum dc_irq_source source); > - > void dal_irq_service_construct( > struct irq_service *irq_service, > struct irq_service_init_data *init_data); > -- > 2.25.1