[Public] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Sent: Friday, February 14, 2025 12:58 AM > To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH] drm/amdgpu: simplify xgmi peer info calls > > > > On 2/13/2025 9:20 PM, Kim, Jonathan wrote: > > [Public] > > > >> -----Original Message----- > >> From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > >> Sent: Thursday, February 13, 2025 1:35 AM > >> To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > >> Subject: Re: [PATCH] drm/amdgpu: simplify xgmi peer info calls > >> > >> > >> > >> On 2/12/2025 9:27 PM, Jonathan Kim wrote: > >>> Deprecate KFD XGMI peer info calls in favour of calling directly from > >>> simplified XGMI peer info functions. > >>> > >>> Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> > >>> --- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 42 ------------------ > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 --- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 51 +++++++++++++++++----- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 6 +-- > >>> drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 11 +++-- > >>> 5 files changed, 48 insertions(+), 67 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > >>> index 0312231b703e..4cec3a873995 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > >>> @@ -555,48 +555,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct > >> amdgpu_device *adev, int dma_buf_fd, > >>> return r; > >>> } > >>> > >>> -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, > >>> - struct amdgpu_device *src) > >>> -{ > >>> - struct amdgpu_device *peer_adev = src; > >>> - struct amdgpu_device *adev = dst; > >>> - int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); > >>> - > >>> - if (ret < 0) { > >>> - DRM_ERROR("amdgpu: failed to get xgmi hops count between > >> node %d and %d. ret = %d\n", > >>> - adev->gmc.xgmi.physical_node_id, > >>> - peer_adev->gmc.xgmi.physical_node_id, ret); > >>> - ret = 0; > >>> - } > >>> - return (uint8_t)ret; > >>> -} > >>> - > >>> -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, > >>> - struct amdgpu_device *src, > >>> - bool is_min) > >>> -{ > >>> - struct amdgpu_device *adev = dst, *peer_adev; > >>> - int num_links; > >>> - > >>> - if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) > >>> - return 0; > >>> - > >>> - if (src) > >>> - peer_adev = src; > >>> - > >>> - /* num links returns 0 for indirect peers since indirect route is unknown. */ > >>> - num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); > >>> - if (num_links < 0) { > >>> - DRM_ERROR("amdgpu: failed to get xgmi num links between > >> node %d and %d. ret = %d\n", > >>> - adev->gmc.xgmi.physical_node_id, > >>> - peer_adev->gmc.xgmi.physical_node_id, num_links); > >>> - num_links = 0; > >>> - } > >>> - > >>> - /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for > >> bandwidth. */ > >>> - return (num_links * 16 * 25000)/BITS_PER_BYTE; > >>> -} > >>> - > >>> int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, > >> bool is_min) > >>> { > >>> int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > >>> index 092dbd8bec97..28eb1cd0eb5a 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > >>> @@ -255,11 +255,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct > >> amdgpu_device *adev, int dma_buf_fd, > >>> uint64_t *bo_size, void *metadata_buffer, > >>> size_t buffer_size, uint32_t *metadata_size, > >>> uint32_t *flags, int8_t *xcp_id); > >>> -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, > >>> - struct amdgpu_device *src); > >>> -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, > >>> - struct amdgpu_device *src, > >>> - bool is_min); > >>> int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, > >> bool is_min); > >>> int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, > >>> uint32_t *payload); > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > >>> index 74b4349e345a..d18d2a26cc91 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > >>> @@ -818,28 +818,59 @@ int amdgpu_xgmi_update_topology(struct > >> amdgpu_hive_info *hive, struct amdgpu_dev > >>> * num_hops[2:0] = number of hops > >>> */ > >>> int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > >>> - struct amdgpu_device *peer_adev) > >>> + struct amdgpu_device *peer_adev) > >>> { > >>> struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > >>> uint8_t num_hops_mask = 0x7; > >>> int i; > >>> > >>> + if (!adev->gmc.xgmi.supported) > >>> + return 0; > >>> + > >>> for (i = 0 ; i < top->num_nodes; ++i) > >>> if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > >>> return top->nodes[i].num_hops & num_hops_mask; > >>> - return -EINVAL; > >>> + > >>> + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d > >> and %d.\n", > >> > >> Suggest to use dev_ function to identify the device pci number. Since > >> the function still passes, maybe info level is good enough. > > > > Ack'd. Will change. > > > >> > >>> + adev->gmc.xgmi.physical_node_id, > >>> + peer_adev->gmc.xgmi.physical_node_id); > >>> + > >>> + return 0; > >>> } > >>> > >>> -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > >>> - struct amdgpu_device *peer_adev) > >>> +int amdgpu_xgmi_get_bandwidth_mbytes(struct amdgpu_device *adev, > >>> + struct amdgpu_device *peer_adev) > >>> { > >>> - struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > >>> - int i; > >>> + int num_links = !peer_adev ? 1 : 0; > >>> > >>> - for (i = 0 ; i < top->num_nodes; ++i) > >>> - if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > >>> - return top->nodes[i].num_links; > >>> - return -EINVAL; > >>> + if (!adev->gmc.xgmi.supported) > >>> + return 0; > >>> + > >>> + if (peer_adev) { > >>> + struct psp_xgmi_topology_info *top = &adev- > >>> psp.xgmi_context.top_info; > >>> + int i; > >>> + > >>> + for (i = 0 ; i < top->num_nodes; ++i) { > >>> + if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id) > >>> + continue; > >>> + > >>> + num_links = top->nodes[i].num_links; > >>> + break; > >>> + } > >>> + } > >>> + > >>> + /* num links returns 0 for indirect peers since indirect route is unknown. */ > >>> + if (!num_links) { > >> > >> This looks problematic. I guess, it is better to keep the old way of > >> passing min/max. Otherwise, there is a chance that min reports some > >> value and max could report this error. > > > > I don't think this is a problem. The old way of passing is_min == true is tied to > peer_dev == NULL, which made it a redundant argument. > > is_min == false doesn't prevent the issue you've mentioned from happening > because the old code effective sets num_link = 0 if the peer_dev search fails > anyways. > > The problem I was thinking about was with respect to usage in KFD > properties. With this way, min_bandwidth will report some value even if > no link is present between the devices. Max bandwidth will report 0. > Ideally, we should report both min/max = 0 when there is no link present > between the devices. I haven't checked if KFD does some other check to > ensure that there are links between devices before calling the API. The KFD only queries for links within a hive. Checking for max first implies link on non-zero return. We could change the KFD to set zero on if max is zero then. > > > > With the line at the top of the proposed function: > >>> + int num_links = !peer_adev ? 1 : 0; > > The sematics are as follows: > > - NULL peer_dev indicates that caller doesn't want peer-to-peer data to factor > num_links into bandwidth reporting so assume a single XGMI link in bandwidth > calculation. > > I think this semantics doesn't provide enough clarity. XGMI is > interconnect between devices and ideally we expect two devices to be > passed to the API. If any one is NULL, then it's better to reject. > > > - If a failed peer_dev search ends up with num_links == 0, that means the caller > passed in an invalid node (i.e. it's not one of the nodes that's been registered to the > hive). > > Currently, get_hops_count (old and proposed) operates in the same fashion too. > > So the functionality between the proposed changes and old changes should have > remained the same. > > > > The API is usage is clear and general expectation is two devices are not > NULL. That is clear in the case of get_hops_count. I don't see where that expectation is set in the proposal. peer_dev == NULL is asking for a peerless check of potential bandwidth. If you're saying we need a redundant is_min bool to ignore peer_dev num_links checks, I'd say then both are a waste of time for clarity's sake. In that case, if we want to be strict with both peers being non-NULL, change the following: - expose amdgpu_xgmi_get_num_links to KFD to mirror get_num_hops - KFD calls amdgpu_xgmi_get_num_links to do some handling and later pass return into a new bandwidth call if return is non-zero - change amdgpu_xgmi_get_bandwidth_mbytes(peer1, peer2) to amdgpu_xgmi_get_bandwidth(adev, const int num_links, enum unit_type) - adev -> used for future IP checks as a dynamic internal speed constant setter - const int num_links -> caller passes in constant link multiplier. So in this case KFD can pass back the num_links it finds from previous amdgpu_xgmi_get_num_links - enum unit_type -> some enum we can define in amdgpu_xgmi.h to give the back the caller whatever units it's looking for (GTs, Mbytes/sec, Mbps etc ...) Then this way: - KFD enforces min/max bandwidth to be consistent i.e. both are 0 or both are non-zero - bandwidth call is now more general purpose and flexible and provides various bandwidth return types by caller needs. Jon > > > I'm open to adjusting get_num_hops & get_bandwidth_mbytes to return either - > ENODEV (device not found in hive) or -EINVAL (bad peer_dev request) on a failed > search. > > On a failed search, does returning 0 work - zero hop/bandwidth > considered as no link found between the provided devices? > > Thanks, > Lijo > > > That would require a change to KFD CRAT logic to error return itself on an error. > > However, this would change the behaviour of a bandwidth reporting error where > ROCm could still function, to the KFD not loading at all. > > Currently, the consequence of passing an incorrect peer_dev to the bandwidth call > would result in the ROCm Runtime not issuing an SDMA ganged copy i.e. potential > peer-to-peer performance drop. > > > > Jon > > > >> > >> Thanks, > >> Lijo > >> > >>> + DRM_ERROR("amdgpu: failed to get xgmi num links between > >> node %d and %d.\n", > >>> + adev->gmc.xgmi.physical_node_id, > >>> + peer_adev->gmc.xgmi.physical_node_id); > >>> + } > >>> + > >>> + /* > >>> + * TBD - will update IP based bandwidth later. > >>> + * Bandwidth currently assumed to be x16 lanes x 25Gbps. > >>> + */ > >>> + return (num_links * 16 * 25000)/BITS_PER_BYTE; > >>> } > >>> > >>> bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > >>> index d1282b4c6348..325e7972469d 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > >>> @@ -62,10 +62,8 @@ int amdgpu_xgmi_update_topology(struct > >> amdgpu_hive_info *hive, struct amdgpu_dev > >>> int amdgpu_xgmi_add_device(struct amdgpu_device *adev); > >>> int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); > >>> int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); > >>> -int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > >>> - struct amdgpu_device *peer_adev); > >>> -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > >>> - struct amdgpu_device *peer_adev); > >>> +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct > >> amdgpu_device *peer_adev); > >>> +int amdgpu_xgmi_get_bandwidth_mbytes(struct amdgpu_device *adev, struct > >> amdgpu_device *peer_adev); > >>> bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, > >>> struct amdgpu_device *peer_adev); > >>> uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, > >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > >> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > >>> index 70b3ae0b74fe..a787d192390c 100644 > >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > >>> @@ -2133,8 +2133,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int > >> *avail_size, > >>> bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); > >>> int mem_bw = 819200, weight = ext_cpu ? > >> KFD_CRAT_XGMI_WEIGHT : > >>> > >> KFD_CRAT_INTRA_SOCKET_WEIGHT; > >>> - uint32_t bandwidth = ext_cpu ? > >> amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( > >>> - kdev->adev, NULL, true) : > >> mem_bw; > >>> + uint32_t bandwidth = ext_cpu ? > >> amdgpu_xgmi_get_bandwidth_mbytes(kdev->adev, NULL) : > >>> + mem_bw; > >>> > >>> /* > >>> * with host gpu xgmi link, host can access gpu memory whether > >>> @@ -2198,12 +2198,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int > >> *avail_size, > >>> > >>> if (use_ta_info) { > >>> sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * > >>> - amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, > >> peer_kdev->adev); > >>> + amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev- > >>> adev); > >>> sub_type_hdr->maximum_bandwidth_mbs = > >>> - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, > >>> - peer_kdev->adev, false); > >>> + amdgpu_xgmi_get_bandwidth_mbytes(kdev->adev, > >> peer_kdev->adev); > >>> sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr- > >>> maximum_bandwidth_mbs ? > >>> - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, > >> NULL, true) : 0; > >>> + amdgpu_xgmi_get_bandwidth_mbytes(kdev->adev, NULL) : > >> 0; > >>> } else { > >>> bool is_single_hop = kdev->kfd == peer_kdev->kfd; > >>> int weight = is_single_hop ? > >> KFD_CRAT_INTRA_SOCKET_WEIGHT : > >