Am 2021-07-16 um 12:43 p.m. schrieb Jonathan Kim: > Report the min/max bandwidth in megabytes to the kfd for direct > xgmi connections only. By "direct XGMI connections", you mean this doesn't work for links with more than one hop? Will that spew out DRM_ERROR messages for such links? Then it's probably better to downgrade that to an INFO. > > v2: change reporting from num links to bandwidth > > Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> This patch is OK to provide bandwidth information on Aldebaran. What can we do on older GPUs? Can we assume num_links = 1? Or maybe have some hard-coded numbers depending on the number of nodes in the hive? Either way, patch 1 and 2 are Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 23 ++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 12 +++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 2 ++ > drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 12 +++++++++++ > 5 files changed, 50 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > index bfab2f9fdd17..3978578a1c49 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > @@ -553,6 +553,29 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s > return (uint8_t)ret; > } > > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min) > +{ > + struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev; > + int num_links; > + > + if (adev->asic_type != CHIP_ALDEBARAN) > + return 0; > + > + if (src) > + peer_adev = (struct amdgpu_device *)src; > + > + num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); > + if (num_links < 0) { > + DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n", > + adev->gmc.xgmi.physical_node_id, > + peer_adev->gmc.xgmi.physical_node_id, num_links); > + num_links = 0; > + } > + > + /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ > + return (num_links * 16 * 25000)/BITS_PER_BYTE; > +} > + > uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) > { > struct amdgpu_device *adev = (struct amdgpu_device *)kgd; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 81264517d532..e12fccb2d2c4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -226,6 +226,7 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); > uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); > int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); > uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min); > > /* Read user wptr from a specified user address space with page fault > * disabled. The memory must be pinned and mapped to the hardware when > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > index 8567d5d77346..258cf86b32f6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > @@ -486,6 +486,18 @@ int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > return -EINVAL; > } > > +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > + struct amdgpu_device *peer_adev) > +{ > + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > + int i; > + > + for (i = 0 ; i < top->num_nodes; ++i) > + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > + return top->nodes[i].num_links; > + return -EINVAL; > +} > + > int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > { > struct psp_xgmi_topology_info *top_info; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > index 12969c0830d5..d2189bf7d428 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > @@ -59,6 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); > int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); > int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > struct amdgpu_device *peer_adev); > +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > + struct amdgpu_device *peer_adev); > uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, > uint64_t addr); > static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > index c6b02aee4993..40ce6239c813 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > @@ -1989,6 +1989,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, > sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; > sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; > sub_type_hdr->num_hops_xgmi = 1; > + if (adev->asic_type == CHIP_ALDEBARAN) { > + sub_type_hdr->minimum_bandwidth_mbs = > + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( > + kdev->kgd, NULL, true); > + sub_type_hdr->maximum_bandwidth_mbs = > + sub_type_hdr->minimum_bandwidth_mbs; > + } > } else { > sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; > } > @@ -2033,6 +2040,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, > sub_type_hdr->proximity_domain_to = proximity_domain_to; > sub_type_hdr->num_hops_xgmi = > amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); > + sub_type_hdr->maximum_bandwidth_mbs = > + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false); > + sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? > + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0; > + > return 0; > } > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx