On 2/21/2025 11:53 PM, Jonathan Kim wrote: > Deprecate KFD XGMI peer info calls in favour of calling directly from > simplified XGMI peer info functions. > > v2: generalize bandwidth interface to return range in one call > > Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 42 ---------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 58 ++++++++++++++++++---- > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 24 +++++++-- > drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 27 ++++++---- > 5 files changed, 84 insertions(+), 72 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > index 0312231b703e..4cec3a873995 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > @@ -555,48 +555,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, > return r; > } > > -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, > - struct amdgpu_device *src) > -{ > - struct amdgpu_device *peer_adev = src; > - struct amdgpu_device *adev = dst; > - int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); > - > - if (ret < 0) { > - DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", > - adev->gmc.xgmi.physical_node_id, > - peer_adev->gmc.xgmi.physical_node_id, ret); > - ret = 0; > - } > - return (uint8_t)ret; > -} > - > -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, > - struct amdgpu_device *src, > - bool is_min) > -{ > - struct amdgpu_device *adev = dst, *peer_adev; > - int num_links; > - > - if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) > - return 0; > - > - if (src) > - peer_adev = src; > - > - /* num links returns 0 for indirect peers since indirect route is unknown. */ > - num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); > - if (num_links < 0) { > - DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n", > - adev->gmc.xgmi.physical_node_id, > - peer_adev->gmc.xgmi.physical_node_id, num_links); > - num_links = 0; > - } > - > - /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ > - return (num_links * 16 * 25000)/BITS_PER_BYTE; > -} > - > int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min) > { > int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 55d539967695..b6ca41859b53 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -254,11 +254,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, > uint64_t *bo_size, void *metadata_buffer, > size_t buffer_size, uint32_t *metadata_size, > uint32_t *flags, int8_t *xcp_id); > -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, > - struct amdgpu_device *src); > -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, > - struct amdgpu_device *src, > - bool is_min); > int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); > int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, > uint32_t *payload); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > index c98b6b35cfdf..a4545edfed8e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > @@ -818,28 +818,66 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev > * num_hops[2:0] = number of hops > */ > int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > - struct amdgpu_device *peer_adev) > + struct amdgpu_device *peer_adev) > { > struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > uint8_t num_hops_mask = 0x7; > int i; > > + if (!adev->gmc.xgmi.supported) > + return 0; > + > for (i = 0 ; i < top->num_nodes; ++i) > if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > return top->nodes[i].num_hops & num_hops_mask; > - return -EINVAL; > + > + dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n", > + peer_adev->gmc.xgmi.physical_node_id); > + > + return 0; > } > > -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > - struct amdgpu_device *peer_adev) > +int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev, > + enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit, > + uint32_t *min_bw, uint32_t *max_bw) > { > - struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > - int i; > + bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER; > + int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1; > + int speed = 25, num_lanes = 16, num_links = !peer_mode ? 1 : -1; > > - for (i = 0 ; i < top->num_nodes; ++i) > - if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > - return top->nodes[i].num_links; > - return -EINVAL; > + *min_bw = 0; > + *max_bw = 0; Better to do a NULL check also. With that change, Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> Thanks, Lijo > + > + if (!adev->gmc.xgmi.supported) > + return -ENODATA; > + > + if (peer_mode && !peer_adev) > + return -EINVAL; > + > + if (peer_mode) { > + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > + int i; > + > + for (i = 0 ; i < top->num_nodes; ++i) { > + if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id) > + continue; > + > + num_links = top->nodes[i].num_links; > + break; > + } > + } > + > + if (num_links == -1) { > + dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n", > + peer_adev->gmc.xgmi.physical_node_id); > + } else if (num_links) { > + int per_link_bw = (speed * num_lanes * unit_scale)/BITS_PER_BYTE; > + > + *min_bw = per_link_bw; > + *max_bw = num_links * per_link_bw; > + } > + > + return 0; > } > > bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > index d1282b4c6348..924da0bec509 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > @@ -55,6 +55,22 @@ struct amdgpu_pcs_ras_field { > uint32_t pcs_err_shift; > }; > > +/** > + * Bandwidth range reporting comes in two modes. > + * > + * PER_LINK - range for any xgmi link > + * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer > + */ > +enum amdgpu_xgmi_bw_mode { > + AMDGPU_XGMI_BW_MODE_PER_LINK = 0, > + AMDGPU_XGMI_BW_MODE_PER_PEER > +}; > + > +enum amdgpu_xgmi_bw_unit { > + AMDGPU_XGMI_BW_UNIT_GBYTES = 0, > + AMDGPU_XGMI_BW_UNIT_MBYTES > +}; > + > extern struct amdgpu_xgmi_ras xgmi_ras; > struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); > void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); > @@ -62,10 +78,10 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev > int amdgpu_xgmi_add_device(struct amdgpu_device *adev); > int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); > int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); > -int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > - struct amdgpu_device *peer_adev); > -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > - struct amdgpu_device *peer_adev); > +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); > +int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev, > + enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit, > + uint32_t *min_bw, uint32_t *max_bw); > bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, > struct amdgpu_device *peer_adev); > uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > index 70b3ae0b74fe..4a7180b46b71 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > @@ -2133,9 +2133,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, > bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); > int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT : > KFD_CRAT_INTRA_SOCKET_WEIGHT; > - uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( > - kdev->adev, NULL, true) : mem_bw; > - > /* > * with host gpu xgmi link, host can access gpu memory whether > * or not pcie bar type is large, so always create bidirectional > @@ -2144,8 +2141,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, > sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; > sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; > sub_type_hdr->weight_xgmi = weight; > - sub_type_hdr->minimum_bandwidth_mbs = bandwidth; > - sub_type_hdr->maximum_bandwidth_mbs = bandwidth; > + if (ext_cpu) { > + amdgpu_xgmi_get_bandwidth(kdev->adev, NULL, > + AMDGPU_XGMI_BW_MODE_PER_LINK, > + AMDGPU_XGMI_BW_UNIT_MBYTES, > + &sub_type_hdr->minimum_bandwidth_mbs, > + &sub_type_hdr->maximum_bandwidth_mbs); > + } else { > + sub_type_hdr->minimum_bandwidth_mbs = mem_bw; > + sub_type_hdr->maximum_bandwidth_mbs = mem_bw; > + } > } else { > sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; > sub_type_hdr->minimum_bandwidth_mbs = > @@ -2198,12 +2203,12 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, > > if (use_ta_info) { > sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * > - amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); > - sub_type_hdr->maximum_bandwidth_mbs = > - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, > - peer_kdev->adev, false); > - sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? > - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; > + amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev->adev); > + amdgpu_xgmi_get_bandwidth(kdev->adev, peer_kdev->adev, > + AMDGPU_XGMI_BW_MODE_PER_PEER, > + AMDGPU_XGMI_BW_UNIT_MBYTES, > + &sub_type_hdr->minimum_bandwidth_mbs, > + &sub_type_hdr->maximum_bandwidth_mbs); > } else { > bool is_single_hop = kdev->kfd == peer_kdev->kfd; > int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :