[Public] Thanks, Lyude! Comments inline. > -----Original Message----- > From: Lyude Paul <lyude@xxxxxxxxxx> > Sent: Saturday, November 5, 2022 7:59 AM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Wentland, Harry <Harry.Wentland@xxxxxxx>; stable@xxxxxxxxxxxxxxx; > Li, Sun peng (Leo) <Sunpeng.Li@xxxxxxx>; Siqueira, Rodrigo > <Rodrigo.Siqueira@xxxxxxx>; Deucher, Alexander > <Alexander.Deucher@xxxxxxx>; Koenig, Christian > <Christian.Koenig@xxxxxxx>; Pan, Xinhui <Xinhui.Pan@xxxxxxx>; David > Airlie <airlied@xxxxxxxxx>; Daniel Vetter <daniel@xxxxxxxx>; Kazlauskas, > Nicholas <Nicholas.Kazlauskas@xxxxxxx>; Pillai, Aurabindo > <Aurabindo.Pillai@xxxxxxx>; Li, Roman <Roman.Li@xxxxxxx>; Zuo, Jerry > <Jerry.Zuo@xxxxxxx>; Wu, Hersen <hersenxs.wu@xxxxxxx>; Lin, Wayne > <Wayne.Lin@xxxxxxx>; Thomas Zimmermann <tzimmermann@xxxxxxx>; > Mahfooz, Hamza <Hamza.Mahfooz@xxxxxxx>; Hung, Alex > <Alex.Hung@xxxxxxx>; Francis, David <David.Francis@xxxxxxx>; Mikita > Lipski <mikita.lipski@xxxxxxx>; Liu, Wenjing <Wenjing.Liu@xxxxxxx>; > open list:DRM DRIVERS <dri-devel@xxxxxxxxxxxxxxxxxxxxx>; open list <linux- > kernel@xxxxxxxxxxxxxxx> > Subject: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and > deadlocking > > It appears that amdgpu makes the mistake of completely ignoring the return > values from the DP MST helpers, and instead just returns a simple true/false. > In this case, it seems to have come back to bite us because as a result of > simply returning false from compute_mst_dsc_configs_for_state(), amdgpu > had no way of telling when a deadlock happened from these helpers. This > could definitely result in some kernel splats. > > Signed-off-by: Lyude Paul <lyude@xxxxxxxxxx> > Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share") > Cc: Harry Wentland <harry.wentland@xxxxxxx> > Cc: <stable@xxxxxxxxxxxxxxx> # v5.6+ > --- > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 18 +-- > .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 107 ++++++++++------ > -- > .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 12 +- > 3 files changed, 73 insertions(+), 64 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index 0db2a88cd4d7b..6f76b2c84cdb5 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -6462,7 +6462,7 @@ static int > dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, > struct drm_connector_state *new_con_state; > struct amdgpu_dm_connector *aconnector; > struct dm_connector_state *dm_conn_state; > - int i, j; > + int i, j, ret; > int vcpi, pbn_div, pbn, slot_num = 0; > > for_each_new_connector_in_state(state, connector, > new_con_state, i) { @@ -6509,8 +6509,11 @@ static int > dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, > dm_conn_state->pbn = pbn; > dm_conn_state->vcpi_slots = slot_num; > > - drm_dp_mst_atomic_enable_dsc(state, aconnector- > >port, dm_conn_state->pbn, > - false); > + ret = drm_dp_mst_atomic_enable_dsc(state, > aconnector->port, > + dm_conn_state- > >pbn, false); > + if (ret != 0) > + return ret; > + > continue; > } > > @@ -9523,10 +9526,9 @@ static int amdgpu_dm_atomic_check(struct > drm_device *dev, > > #if defined(CONFIG_DRM_AMD_DC_DCN) > if (dc_resource_is_dsc_encoding_supported(dc)) { > - if (!pre_validate_dsc(state, &dm_state, vars)) { > - ret = -EINVAL; > + ret = pre_validate_dsc(state, &dm_state, vars); > + if (ret != 0) > goto fail; > - } > } > #endif > > @@ -9621,9 +9623,9 @@ static int amdgpu_dm_atomic_check(struct > drm_device *dev, > } > > #if defined(CONFIG_DRM_AMD_DC_DCN) > - if (!compute_mst_dsc_configs_for_state(state, dm_state- > >context, vars)) { > + ret = compute_mst_dsc_configs_for_state(state, dm_state- > >context, vars); > + if (ret) { > > DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() > failed\n"); > - ret = -EINVAL; > goto fail; > } > > diff --git > a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > index 6ff96b4bdda5c..30bc2e5058b70 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > +++ > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > @@ -864,25 +864,25 @@ static bool try_disable_dsc(struct > drm_atomic_state *state, > return true; > } > > -static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state > *state, > - struct dc_state *dc_state, > - struct dc_link *dc_link, > - struct dsc_mst_fairness_vars *vars, > - struct drm_dp_mst_topology_mgr > *mgr, > - int *link_vars_start_index) > +static int compute_mst_dsc_configs_for_link(struct drm_atomic_state > *state, > + struct dc_state *dc_state, > + struct dc_link *dc_link, > + struct dsc_mst_fairness_vars *vars, > + struct drm_dp_mst_topology_mgr > *mgr, > + int *link_vars_start_index) > { > struct dc_stream_state *stream; > struct dsc_mst_fairness_params params[MAX_PIPES]; > struct amdgpu_dm_connector *aconnector; > struct drm_dp_mst_topology_state *mst_state = > drm_atomic_get_mst_topology_state(state, mgr); > int count = 0; > - int i, k; > + int i, k, ret; > bool debugfs_overwrite = false; > > memset(params, 0, sizeof(params)); > > if (IS_ERR(mst_state)) > - return false; > + return PTR_ERR(mst_state); > > mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link); #if > defined(CONFIG_DRM_AMD_DC_DCN) @@ -933,7 +933,7 @@ static bool > compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, > > if (count == 0) { > ASSERT(0); > - return true; > + return 0; > } > > /* k is start index of vars for current phy link used by mst hub */ @@ > -949,11 +949,14 @@ static bool compute_mst_dsc_configs_for_link(struct > drm_atomic_state *state, > vars[i + k].bpp_x16 = 0; > if (drm_dp_atomic_find_time_slots(state, params[i].port- > >mgr, params[i].port, > vars[i + k].pbn) < 0) > - return false; > + return -EINVAL; Should we also return the error code get from drm_dp_atomic_find_time_slots() rather than assigning a new one here? > } > - if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) { > + ret = drm_dp_mst_atomic_check(state); > + if (ret == 0 && !debugfs_overwrite) { > set_dsc_configs_from_fairness_vars(params, vars, count, k); > - return true; > + return 0; > + } else if (ret == -EDEADLK) { > + return ret; I think we should return here whenever there is an error. Not just for EDEADLK case. > } > > /* Try max compression */ > @@ -964,29 +967,30 @@ static bool > compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, > vars[i + k].bpp_x16 = > params[i].bw_range.min_target_bpp_x16; > if (drm_dp_atomic_find_time_slots(state, > params[i].port->mgr, > params[i].port, vars[i > + k].pbn) < 0) > - return false; > + return -EINVAL; Same as above. > } else { > vars[i + k].pbn = > kbps_to_peak_pbn(params[i].bw_range.stream_kbps); > vars[i + k].dsc_enabled = false; > vars[i + k].bpp_x16 = 0; > if (drm_dp_atomic_find_time_slots(state, > params[i].port->mgr, > params[i].port, vars[i > + k].pbn) < 0) > - return false; > + return -EINVAL; Same as above. > } > } > - if (drm_dp_mst_atomic_check(state)) > - return false; > + ret = drm_dp_mst_atomic_check(state); > + if (ret != 0) > + return ret; > > /* Optimize degree of compression */ > if (!increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, > k)) > - return false; > + return -ENOSPC; > > if (!try_disable_dsc(state, dc_link, params, vars, count, k)) > - return false; > + return -ENOSPC; > > set_dsc_configs_from_fairness_vars(params, vars, count, k); > > - return true; > + return 0; > } > > static bool is_dsc_need_re_compute( > @@ -1087,15 +1091,16 @@ static bool is_dsc_need_re_compute( > return is_dsc_need_re_compute; > } > > -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, > - struct dc_state *dc_state, > - struct dsc_mst_fairness_vars *vars) > +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, > + struct dc_state *dc_state, > + struct dsc_mst_fairness_vars *vars) > { > int i, j; > struct dc_stream_state *stream; > bool computed_streams[MAX_PIPES]; > struct amdgpu_dm_connector *aconnector; > int link_vars_start_index = 0; > + int ret = 0; > > for (i = 0; i < dc_state->stream_count; i++) > computed_streams[i] = false; > @@ -1118,17 +1123,19 @@ bool compute_mst_dsc_configs_for_state(struct > drm_atomic_state *state, > continue; > > if (dcn20_remove_stream_from_ctx(stream->ctx->dc, > dc_state, stream) != DC_OK) > - return false; > + return -EINVAL; > > if (!is_dsc_need_re_compute(state, dc_state, stream->link)) > continue; > > mutex_lock(&aconnector->mst_mgr.lock); > - if (!compute_mst_dsc_configs_for_link(state, dc_state, > stream->link, vars, > - &aconnector->mst_mgr, > - &link_vars_start_index)) { > + > + ret = compute_mst_dsc_configs_for_link(state, dc_state, > stream->link, vars, > + &aconnector->mst_mgr, > + &link_vars_start_index); > + if (ret != 0) { > mutex_unlock(&aconnector->mst_mgr.lock); > - return false; > + return ret; > } > mutex_unlock(&aconnector->mst_mgr.lock); > > @@ -1143,22 +1150,22 @@ bool compute_mst_dsc_configs_for_state(struct > drm_atomic_state *state, > > if (stream->timing.flags.DSC == 1) > if (dc_stream_add_dsc_to_resource(stream->ctx- > >dc, dc_state, stream) != DC_OK) > - return false; > + return -EINVAL; > } > > - return true; > + return ret; > } > > -static bool > - pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state > *state, > - struct dc_state *dc_state, > - struct dsc_mst_fairness_vars > *vars) > +static int pre_compute_mst_dsc_configs_for_state(struct > drm_atomic_state *state, > + struct dc_state *dc_state, > + struct dsc_mst_fairness_vars > *vars) > { > int i, j; > struct dc_stream_state *stream; > bool computed_streams[MAX_PIPES]; > struct amdgpu_dm_connector *aconnector; > int link_vars_start_index = 0; > + int ret; > > for (i = 0; i < dc_state->stream_count; i++) > computed_streams[i] = false; > @@ -1184,13 +1191,12 @@ static bool > continue; > > mutex_lock(&aconnector->mst_mgr.lock); > - if (!compute_mst_dsc_configs_for_link(state, dc_state, > stream->link, vars, > - &aconnector->mst_mgr, > - &link_vars_start_index)) { > - mutex_unlock(&aconnector->mst_mgr.lock); > - return false; > - } > + ret = compute_mst_dsc_configs_for_link(state, dc_state, > stream->link, vars, > + &aconnector->mst_mgr, > + &link_vars_start_index); > mutex_unlock(&aconnector->mst_mgr.lock); > + if (ret != 0) > + return ret; > > for (j = 0; j < dc_state->stream_count; j++) { > if (dc_state->streams[j]->link == stream->link) @@ - > 1198,7 +1204,7 @@ static bool > } > } > > - return true; > + return ret; > } > > static int find_crtc_index_in_state_by_stream(struct drm_atomic_state > *state, @@ -1253,9 +1259,9 @@ static bool > is_dsc_precompute_needed(struct drm_atomic_state *state) > return ret; > } > > -bool pre_validate_dsc(struct drm_atomic_state *state, > - struct dm_atomic_state **dm_state_ptr, > - struct dsc_mst_fairness_vars *vars) > +int pre_validate_dsc(struct drm_atomic_state *state, > + struct dm_atomic_state **dm_state_ptr, > + struct dsc_mst_fairness_vars *vars) > { > int i; > struct dm_atomic_state *dm_state; > @@ -1264,11 +1270,12 @@ bool pre_validate_dsc(struct drm_atomic_state > *state, > > if (!is_dsc_precompute_needed(state)) { > DRM_INFO_ONCE("DSC precompute is not needed.\n"); > - return true; > + return 0; > } > - if (dm_atomic_get_state(state, dm_state_ptr)) { > + ret = dm_atomic_get_state(state, dm_state_ptr); > + if (ret != 0) { > DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); > - return false; > + return ret; > } > dm_state = *dm_state_ptr; > > @@ -1280,7 +1287,7 @@ bool pre_validate_dsc(struct drm_atomic_state > *state, > > local_dc_state = kmemdup(dm_state->context, sizeof(struct > dc_state), GFP_KERNEL); > if (!local_dc_state) > - return false; > + return -ENOMEM; > > for (i = 0; i < local_dc_state->stream_count; i++) { > struct dc_stream_state *stream = dm_state->context- > >streams[i]; @@ -1316,9 +1323,9 @@ bool pre_validate_dsc(struct > drm_atomic_state *state, > if (ret != 0) > goto clean_exit; > > - if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, > vars)) { > + ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, > vars); > + if (ret != 0) { > > DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() > failed\n"); > - ret = -EINVAL; > goto clean_exit; > } > > @@ -1349,7 +1356,7 @@ bool pre_validate_dsc(struct drm_atomic_state > *state, > > kfree(local_dc_state); > > - return (ret == 0); > + return ret; > } > > static unsigned int kbps_from_pbn(unsigned int pbn) diff --git > a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h > index b92a7c5671aa2..97fd70df531bf 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h > +++ > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h > @@ -53,15 +53,15 @@ struct dsc_mst_fairness_vars { > struct amdgpu_dm_connector *aconnector; }; > > -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, > - struct dc_state *dc_state, > - struct dsc_mst_fairness_vars *vars); > +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, > + struct dc_state *dc_state, > + struct dsc_mst_fairness_vars *vars); > > bool needs_dsc_aux_workaround(struct dc_link *link); > > -bool pre_validate_dsc(struct drm_atomic_state *state, > - struct dm_atomic_state **dm_state_ptr, > - struct dsc_mst_fairness_vars *vars); > +int pre_validate_dsc(struct drm_atomic_state *state, > + struct dm_atomic_state **dm_state_ptr, > + struct dsc_mst_fairness_vars *vars); > > enum dc_status dm_dp_mst_is_port_support_mode( > struct amdgpu_dm_connector *aconnector, > -- > 2.37.3 --- Regards, Wayne Lin