[AMD Official Use Only] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Sent: Monday, July 19, 2021 3:22 AM > To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd- > gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Subject: Re: [PATCH 2/3] drm/amdkfd: report xgmi bandwidth between > direct peers to the kfd > > > > On 7/16/2021 10:13 PM, Jonathan Kim wrote: > > Report the min/max bandwidth in megabytes to the kfd for direct xgmi > > connections only. > > > > v2: change reporting from num links to bandwidth > > > > Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 23 > ++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 12 +++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 2 ++ > > drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 12 +++++++++++ > > 5 files changed, 50 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > > index bfab2f9fdd17..3978578a1c49 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > > @@ -553,6 +553,29 @@ uint8_t > amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev > *s > > return (uint8_t)ret; > > } > > > > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, > > +struct kgd_dev *src, bool is_min) { > > + struct amdgpu_device *adev = (struct amdgpu_device *)dst, > *peer_adev; > > + int num_links; > > + > > + if (adev->asic_type != CHIP_ALDEBARAN) > > + return 0; > > + > > + if (src) > > + peer_adev = (struct amdgpu_device *)src; > > + > > + num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, > peer_adev); > > + if (num_links < 0) { > > + DRM_ERROR("amdgpu: failed to get xgmi num links between > node %d and %d. ret = %d\n", > > + adev->gmc.xgmi.physical_node_id, > > + peer_adev->gmc.xgmi.physical_node_id, num_links); > > + num_links = 0; > > + } > > + > > + /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for > bandwidth. */ > > + return (num_links * 16 * 25000)/BITS_PER_BYTE; > > Instead of having ASIC family checks and bandwidth info in interface > functions, better to have this info come from base layer (amdgpu_xgmi or > xgmi ip). That will help to handle other ASICs. Ok. We can revisit this as a follow up. Maybe the full solution is a link width/speed support mask analogous to pcie. Thanks, Jon > > Thanks, > Lijo > > > uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev > *kgd) > > { > > struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff > > --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > > index 81264517d532..e12fccb2d2c4 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > > @@ -226,6 +226,7 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct > kgd_dev *kgd); > > uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); > > int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); > > uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, > > struct kgd_dev *src); > > +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, > > +struct kgd_dev *src, bool is_min); > > > > /* Read user wptr from a specified user address space with page fault > > * disabled. The memory must be pinned and mapped to the hardware > > when diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > index 8567d5d77346..258cf86b32f6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > > @@ -486,6 +486,18 @@ int amdgpu_xgmi_get_hops_count(struct > amdgpu_device *adev, > > return -EINVAL; > > } > > > > +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > > + struct amdgpu_device *peer_adev) > > +{ > > + struct psp_xgmi_topology_info *top = &adev- > >psp.xgmi_context.top_info; > > + int i; > > + > > + for (i = 0 ; i < top->num_nodes; ++i) > > + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > > + return top->nodes[i].num_links; > > + return -EINVAL; > > +} > > + > > int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > > { > > struct psp_xgmi_topology_info *top_info; diff --git > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > > index 12969c0830d5..d2189bf7d428 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > > @@ -59,6 +59,8 @@ int amdgpu_xgmi_remove_device(struct > amdgpu_device *adev); > > int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); > > int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > > struct amdgpu_device *peer_adev); > > +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, > > + struct amdgpu_device *peer_adev); > > uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device > *adev, > > uint64_t addr); > > static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > > b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > > index c6b02aee4993..40ce6239c813 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > > @@ -1989,6 +1989,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int > *avail_size, > > sub_type_hdr->flags |= > CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; > > sub_type_hdr->io_interface_type = > CRAT_IOLINK_TYPE_XGMI; > > sub_type_hdr->num_hops_xgmi = 1; > > + if (adev->asic_type == CHIP_ALDEBARAN) { > > + sub_type_hdr->minimum_bandwidth_mbs = > > + > amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( > > + kdev->kgd, NULL, > true); > > + sub_type_hdr->maximum_bandwidth_mbs = > > + sub_type_hdr- > >minimum_bandwidth_mbs; > > + } > > } else { > > sub_type_hdr->io_interface_type = > CRAT_IOLINK_TYPE_PCIEXPRESS; > > } > > @@ -2033,6 +2040,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int > *avail_size, > > sub_type_hdr->proximity_domain_to = proximity_domain_to; > > sub_type_hdr->num_hops_xgmi = > > amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, > peer_kdev->kgd); > > + sub_type_hdr->maximum_bandwidth_mbs = > > + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, > peer_kdev->kgd, false); > > + sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr- > >maximum_bandwidth_mbs ? > > + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, > NULL, true) : 0; > > + > > return 0; > > } > > > > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx