[Public] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Sent: Thursday, February 13, 2025 1:35 AM > To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH] drm/amdgpu: simplify xgmi peer info calls > > > > On 2/12/2025 9:27 PM, Jonathan Kim wrote: > > Deprecate KFD XGMI peer info calls in favour of calling directly from > > simplified XGMI peer info functions. > > > > Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 42 ------------------ > > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 51 +++++++++++++++++----- > > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 6 +-- > > drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 11 +++-- > > 5 files changed, 48 insertions(+), 67 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > > index 0312231b703e..4cec3a873995 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > > @@ -555,48 +555,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct > amdgpu_device *adev, int dma_buf_fd, > > return r; > > } > > > > -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, > > - struct amdgpu_device *src) > > -{ > > - struct amdgpu_device *peer_adev = src; > > - struct amdgpu_device *adev = dst; > > - int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); > > - > > - if (ret < 0) { > > - DRM_ERROR("amdgpu: failed to get xgmi hops count between > node %d and %d. ret = %d\n", > > - adev->gmc.xgmi.physical_node_id, > > - peer_adev->gmc.xgmi.physical_node_id, ret); > > - ret = 0; > > - } > > - return (uint8_t)ret; > > -} > > - > > -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, > > - struct amdgpu_device *src, > > - bool is_min) > > -{ > > - struct amdgpu_device *adev = dst, *peer_adev; > > - int num_links; > > - > > - if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) > > - return 0; > > - > > - if (src) > > - peer_adev = src; > > - > > - /* num links returns 0 for indirect peers since indirect route is unknown. */ > > - num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); > > - if (num_links < 0) { > > - DRM_ERROR("amdgpu: failed to get xgmi num links between > node %d and %d. ret = %d\n", > > - adev->gmc.xgmi.physical_node_id, > > - peer_adev->gmc.xgmi.physical_node_id, num_links); > > - num_links = 0; > > - } > > - > > - /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for > bandwidth. */ > > - return (num_links * 16 * 25000)/BITS_PER_BYTE; > > -} > > - > > int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, > bool is_min) > > { > > int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > > index 092dbd8bec97..28eb1cd0eb5a 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > > @@ -255,11 +255,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct > amdgpu_device *adev, int dma_buf_fd, > > uint64_t *bo_size, void *metadata_buffer, > > size_t buffer_size, uint32_t *metadata_size, > > uint32_t *flags, int8_t *xcp_id); > > -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, > > - struct amdgpu_device *src); > > -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, > > - struct amdgpu_device *src, > > - bool is_min); > > int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, > bool is_min); > > int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, > > uint32_t *payload); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > index 74b4349e345a..d18d2a26cc91 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > @@ -818,28 +818,59 @@ int amdgpu_xgmi_update_topology(struct > amdgpu_hive_info *hive, struct amdgpu_dev > > * num_hops[2:0] = number of hops > > */ > > int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > > - struct amdgpu_device *peer_adev) > > + struct amdgpu_device *peer_adev) > > { > > struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > > uint8_t num_hops_mask = 0x7; > > int i; > > > > + if (!adev->gmc.xgmi.supported) > > + return 0; > > + > > for (i = 0 ; i < top->num_nodes; ++i) > > if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > > return top->nodes[i].num_hops & num_hops_mask; > > - return -EINVAL; > > + > > + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d > and %d.\n", > > Suggest to use dev_ function to identify the device pci number. Since > the function still passes, maybe info level is good enough. Ack'd. Will change. > > > + adev->gmc.xgmi.physical_node_id, > > + peer_adev->gmc.xgmi.physical_node_id); > > + > > + return 0; > > } > > > > -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > > - struct amdgpu_device *peer_adev) > > +int amdgpu_xgmi_get_bandwidth_mbytes(struct amdgpu_device *adev, > > + struct amdgpu_device *peer_adev) > > { > > - struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > > - int i; > > + int num_links = !peer_adev ? 1 : 0; > > > > - for (i = 0 ; i < top->num_nodes; ++i) > > - if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > > - return top->nodes[i].num_links; > > - return -EINVAL; > > + if (!adev->gmc.xgmi.supported) > > + return 0; > > + > > + if (peer_adev) { > > + struct psp_xgmi_topology_info *top = &adev- > >psp.xgmi_context.top_info; > > + int i; > > + > > + for (i = 0 ; i < top->num_nodes; ++i) { > > + if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id) > > + continue; > > + > > + num_links = top->nodes[i].num_links; > > + break; > > + } > > + } > > + > > + /* num links returns 0 for indirect peers since indirect route is unknown. */ > > + if (!num_links) { > > This looks problematic. I guess, it is better to keep the old way of > passing min/max. Otherwise, there is a chance that min reports some > value and max could report this error. I don't think this is a problem. The old way of passing is_min == true is tied to peer_dev == NULL, which made it a redundant argument. is_min == false doesn't prevent the issue you've mentioned from happening because the old code effective sets num_link = 0 if the peer_dev search fails anyways. With the line at the top of the proposed function: > > + int num_links = !peer_adev ? 1 : 0; The sematics are as follows: - NULL peer_dev indicates that caller doesn't want peer-to-peer data to factor num_links into bandwidth reporting so assume a single XGMI link in bandwidth calculation. - If a failed peer_dev search ends up with num_links == 0, that means the caller passed in an invalid node (i.e. it's not one of the nodes that's been registered to the hive). Currently, get_hops_count (old and proposed) operates in the same fashion too. So the functionality between the proposed changes and old changes should have remained the same. I'm open to adjusting get_num_hops & get_bandwidth_mbytes to return either -ENODEV (device not found in hive) or -EINVAL (bad peer_dev request) on a failed search. That would require a change to KFD CRAT logic to error return itself on an error. However, this would change the behaviour of a bandwidth reporting error where ROCm could still function, to the KFD not loading at all. Currently, the consequence of passing an incorrect peer_dev to the bandwidth call would result in the ROCm Runtime not issuing an SDMA ganged copy i.e. potential peer-to-peer performance drop. Jon > > Thanks, > Lijo > > > + DRM_ERROR("amdgpu: failed to get xgmi num links between > node %d and %d.\n", > > + adev->gmc.xgmi.physical_node_id, > > + peer_adev->gmc.xgmi.physical_node_id); > > + } > > + > > + /* > > + * TBD - will update IP based bandwidth later. > > + * Bandwidth currently assumed to be x16 lanes x 25Gbps. > > + */ > > + return (num_links * 16 * 25000)/BITS_PER_BYTE; > > } > > > > bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > > index d1282b4c6348..325e7972469d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > > @@ -62,10 +62,8 @@ int amdgpu_xgmi_update_topology(struct > amdgpu_hive_info *hive, struct amdgpu_dev > > int amdgpu_xgmi_add_device(struct amdgpu_device *adev); > > int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); > > int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); > > -int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > > - struct amdgpu_device *peer_adev); > > -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > > - struct amdgpu_device *peer_adev); > > +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct > amdgpu_device *peer_adev); > > +int amdgpu_xgmi_get_bandwidth_mbytes(struct amdgpu_device *adev, struct > amdgpu_device *peer_adev); > > bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, > > struct amdgpu_device *peer_adev); > > uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > > index 70b3ae0b74fe..a787d192390c 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > > @@ -2133,8 +2133,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int > *avail_size, > > bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); > > int mem_bw = 819200, weight = ext_cpu ? > KFD_CRAT_XGMI_WEIGHT : > > > KFD_CRAT_INTRA_SOCKET_WEIGHT; > > - uint32_t bandwidth = ext_cpu ? > amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( > > - kdev->adev, NULL, true) : > mem_bw; > > + uint32_t bandwidth = ext_cpu ? > amdgpu_xgmi_get_bandwidth_mbytes(kdev->adev, NULL) : > > + mem_bw; > > > > /* > > * with host gpu xgmi link, host can access gpu memory whether > > @@ -2198,12 +2198,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int > *avail_size, > > > > if (use_ta_info) { > > sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * > > - amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, > peer_kdev->adev); > > + amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev- > >adev); > > sub_type_hdr->maximum_bandwidth_mbs = > > - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, > > - peer_kdev->adev, false); > > + amdgpu_xgmi_get_bandwidth_mbytes(kdev->adev, > peer_kdev->adev); > > sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr- > >maximum_bandwidth_mbs ? > > - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, > NULL, true) : 0; > > + amdgpu_xgmi_get_bandwidth_mbytes(kdev->adev, NULL) : > 0; > > } else { > > bool is_single_hop = kdev->kfd == peer_kdev->kfd; > > int weight = is_single_hop ? > KFD_CRAT_INTRA_SOCKET_WEIGHT :