[Public] > -----Original Message----- > From: Chander, Vignesh <Vignesh.Chander@xxxxxxx> > Sent: Thursday, December 7, 2023 7:42 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Luo, Zhigang > <Zhigang.Luo@xxxxxxx>; Kim, Jonathan <Jonathan.Kim@xxxxxxx>; > Chander, Vignesh <Vignesh.Chander@xxxxxxx> > Subject: [PATCH] drm/amdgpu: xgmi_fill_topology_info > > 1. Use the mirrored topology info to fill links for VF. > The new solution is required to simplify and optimize host driver logic. > Only use the new solution for VFs that support full duplex and > extended_peer_link_info otherwise the info would be incomplete. > > 2. avoid calling extended_link_info on VF as its not supported > > Signed-off-by: Vignesh Chander <Vignesh.Chander@xxxxxxx> Reviewed-by: Jonathan Kim <jonathan.kim@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 58 > ++++++++++++++++++++---- > 2 files changed, 52 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > index a21045d018f2..1bf975b8d083 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > @@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct > psp_context *psp, > get_extended_data) || > amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == > IP_VERSION(13, 0, 6); > - bool ta_port_num_support = psp- > >xgmi_context.xgmi_ta_caps & > - > EXTEND_PEER_LINK_INFO_CMD_FLAG; > + bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? > 0 : > + psp->xgmi_context.xgmi_ta_caps & > EXTEND_PEER_LINK_INFO_CMD_FLAG; > > /* popluate the shared output buffer rather than the cmd > input buffer > * with node_ids as the input for GET_PEER_LINKS command > execution. > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > index 44d8c1a11e1b..dd82d73daed6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > @@ -823,6 +823,28 @@ static int > amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf > return 0; > } > > +void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev, > + struct amdgpu_device *peer_adev) > +{ > + struct psp_xgmi_topology_info *top_info = &adev- > >psp.xgmi_context.top_info; > + struct psp_xgmi_topology_info *peer_info = &peer_adev- > >psp.xgmi_context.top_info; > + > + for (int i = 0; i < peer_info->num_nodes; i++) { > + if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) > { > + for (int j = 0; j < top_info->num_nodes; j++) { > + if (top_info->nodes[j].node_id == peer_adev- > >gmc.xgmi.node_id) { > + peer_info->nodes[i].num_hops = > top_info->nodes[j].num_hops; > + peer_info- > >nodes[i].is_sharing_enabled = > + top_info- > >nodes[j].is_sharing_enabled; > + peer_info->nodes[i].num_links = > + top_info- > >nodes[j].num_links; > + return; > + } > + } > + } > + } > +} > + > int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > { > struct psp_xgmi_topology_info *top_info; > @@ -897,18 +919,38 @@ int amdgpu_xgmi_add_device(struct > amdgpu_device *adev) > goto exit_unlock; > } > > - /* get latest topology info for each device from psp */ > - list_for_each_entry(tmp_adev, &hive->device_list, > gmc.xgmi.head) { > - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, > count, > - &tmp_adev- > >psp.xgmi_context.top_info, false); > + if (amdgpu_sriov_vf(adev) && > + psp->xgmi_context.xgmi_ta_caps & > EXTEND_PEER_LINK_INFO_CMD_FLAG) { > + /* only get topology for VF being init if it can > support full duplex */ > + ret = psp_xgmi_get_topology_info(&adev->psp, > count, > + &adev- > >psp.xgmi_context.top_info, false); > if (ret) { > - dev_err(tmp_adev->dev, > + dev_err(adev->dev, > "XGMI: Get topology failure on > device %llx, hive %llx, ret %d", > - tmp_adev->gmc.xgmi.node_id, > - tmp_adev->gmc.xgmi.hive_id, ret); > - /* To do : continue with some node failed or > disable the whole hive */ > + adev->gmc.xgmi.node_id, > + adev->gmc.xgmi.hive_id, ret); > + /* To do: continue with some node failed or > disable the whole hive*/ > goto exit_unlock; > } > + > + /* fill the topology info for peers instead of getting > from PSP */ > + list_for_each_entry(tmp_adev, &hive->device_list, > gmc.xgmi.head) { > + amdgpu_xgmi_fill_topology_info(adev, > tmp_adev); > + } > + } else { > + /* get latest topology info for each device from psp > */ > + list_for_each_entry(tmp_adev, &hive->device_list, > gmc.xgmi.head) { > + ret = > psp_xgmi_get_topology_info(&tmp_adev->psp, count, > + &tmp_adev- > >psp.xgmi_context.top_info, false); > + if (ret) { > + dev_err(tmp_adev->dev, > + "XGMI: Get topology failure > on device %llx, hive %llx, ret %d", > + tmp_adev- > >gmc.xgmi.node_id, > + tmp_adev- > >gmc.xgmi.hive_id, ret); > + /* To do : continue with some node > failed or disable the whole hive */ > + goto exit_unlock; > + } > + } > } > > /* get topology again for hives that support extended data > */ > > base-commit: 44cb338138f7670ce2e1f8b9ef14e32c6ace282c > -- > 2.25.1