[Public] > -----Original Message----- > From: Imre Deak <imre.deak@xxxxxxxxx> > Sent: Saturday, March 8, 2025 2:32 AM > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx; intel-xe@xxxxxxxxxxxxxxxxxxxxx; dri- > devel@xxxxxxxxxxxxxxxxxxxxx > Cc: Lin, Wayne <Wayne.Lin@xxxxxxx>; Lyude Paul <lyude@xxxxxxxxxx>; > stable@xxxxxxxxxxxxxxx > Subject: [PATCH] drm/dp_mst: Fix locking when skipping CSN before topology > probing > > The handling of the MST Connection Status Notify message is skipped if the probing > of the topology is still pending. Acquiring the drm_dp_mst_topology_mgr::probe_lock > for this in > drm_dp_mst_handle_up_req() is problematic: the task/work this function is called > from is also responsible for handling MST down-request replies (in > drm_dp_mst_handle_down_rep()). Thus drm_dp_mst_link_probe_work() - holding > already probe_lock - could be blocked waiting for an MST down-request reply while > drm_dp_mst_handle_up_req() is waiting for probe_lock while processing a CSN > message. This leads to the probe work's down-request message timing out. > > A scenario similar to the above leading to a down-request timeout is handling a CSN > message in drm_dp_mst_handle_conn_stat(), holding the probe_lock and sending > down-request messages while a second CSN message sent by the sink > subsequently is handled by drm_dp_mst_handle_up_req(). > > Fix the above by moving the logic to skip the CSN handling to > drm_dp_mst_process_up_req(). This function is called from a work (separate from > the task/work handling new up/down messages), already holding probe_lock. This > solves the above timeout issue, since handling of down-request replies won't be > blocked by probe_lock. > > Fixes: ddf983488c3e ("drm/dp_mst: Skip CSN if topology probing is not done yet") > Cc: Wayne Lin <Wayne.Lin@xxxxxxx> > Cc: Lyude Paul <lyude@xxxxxxxxxx> > Cc: stable@xxxxxxxxxxxxxxx # v6.6+ > Signed-off-by: Imre Deak <imre.deak@xxxxxxxxx> > --- > drivers/gpu/drm/display/drm_dp_mst_topology.c | 40 +++++++++++-------- > 1 file changed, 24 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c > b/drivers/gpu/drm/display/drm_dp_mst_topology.c > index 8b68bb3fbffb0..3a1f1ffc7b552 100644 > --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c > +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c > @@ -4036,6 +4036,22 @@ static int drm_dp_mst_handle_down_rep(struct > drm_dp_mst_topology_mgr *mgr) > return 0; > } > > +static bool primary_mstb_probing_is_done(struct drm_dp_mst_topology_mgr > +*mgr) { > + bool probing_done = false; > + > + mutex_lock(&mgr->lock); Thanks for catching this, Imre! Here I think using mgr->lock is not sufficient for determining probing is done or not by mst_primary->link_address_sent. Since it might still be probing the rest of the topology with mst_primary probed. Use probe_lock instead? Thanks! > + > + if (mgr->mst_primary && drm_dp_mst_topology_try_get_mstb(mgr- > >mst_primary)) { > + probing_done = mgr->mst_primary->link_address_sent; > + drm_dp_mst_topology_put_mstb(mgr->mst_primary); > + } > + > + mutex_unlock(&mgr->lock); > + > + return probing_done; > +} > + > static inline bool > drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, > struct drm_dp_pending_up_req *up_req) @@ -4066,8 > +4082,12 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr > *mgr, > > /* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY > events */ > if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) { > - dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); > - hotplug = true; > + if (!primary_mstb_probing_is_done(mgr)) { > + drm_dbg_kms(mgr->dev, "Got CSN before finish topology > probing. Skip it.\n"); > + } else { > + dowork = drm_dp_mst_handle_conn_stat(mstb, &msg- > >u.conn_stat); > + hotplug = true; > + } > } > > drm_dp_mst_topology_put_mstb(mstb); > @@ -4149,10 +4169,11 @@ static int drm_dp_mst_handle_up_req(struct > drm_dp_mst_topology_mgr *mgr) > drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type, > false); > > + drm_dp_mst_topology_put_mstb(mst_primary); > + > if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { > const struct drm_dp_connection_status_notify *conn_stat = > &up_req->msg.u.conn_stat; > - bool handle_csn; > > drm_dbg_kms(mgr->dev, "Got CSN: pn: %d ldps:%d ddps: %d mcs: > %d ip: %d pdt: %d\n", > conn_stat->port_number, > @@ -4161,16 +4182,6 @@ static int drm_dp_mst_handle_up_req(struct > drm_dp_mst_topology_mgr *mgr) > conn_stat->message_capability_status, > conn_stat->input_port, > conn_stat->peer_device_type); > - > - mutex_lock(&mgr->probe_lock); > - handle_csn = mst_primary->link_address_sent; > - mutex_unlock(&mgr->probe_lock); > - > - if (!handle_csn) { > - drm_dbg_kms(mgr->dev, "Got CSN before finish topology > probing. Skip it."); > - kfree(up_req); > - goto out_put_primary; > - } > } else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { > const struct drm_dp_resource_status_notify *res_stat = > &up_req->msg.u.resource_stat; > @@ -4185,9 +4196,6 @@ static int drm_dp_mst_handle_up_req(struct > drm_dp_mst_topology_mgr *mgr) > list_add_tail(&up_req->next, &mgr->up_req_list); > mutex_unlock(&mgr->up_req_lock); > queue_work(system_long_wq, &mgr->up_req_work); > - > -out_put_primary: > - drm_dp_mst_topology_put_mstb(mst_primary); > out_clear_reply: > reset_msg_rx_state(&mgr->up_req_recv); > return ret; > -- > 2.44.2 -- Regards, Wayne Lin