It seems to me that amdgpu_hive_info is a driver-internal structure, but the psp_xpmi_topology structures are an interface with the PSP that may change in future ASIC generations. So on second thought, adding the psp_xgmi_topology structures to the psp_xgmi_context (or amdgpu_hive_info) like that is probably a bad idea. The structures should probably be defined only in psp_v11_0.c and opaque for the rest of the driver. Anyway, this is getting into a bigger cleanup that is not directly related to this change. We'll probably have to deal with this sooner or later, when a new PSP version changes the XGMI interfaces. Either way, the series is Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> On 2019-04-23 4:21 p.m., Liu, Shaoyun wrote: > KFD need to provide the info for upper level to determine the data path > > Change-Id: Idc809e8f3381b9222dd7be96539522d440f3ee7d > Signed-off-by: shaoyunl <shaoyun.liu@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 26 ++++++++++++++------------ > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 23 ++++++++++++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 3 ++- > 5 files changed, 50 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > index acf8ae0..8f8523a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > @@ -27,6 +27,7 @@ > #include "amdgpu_gfx.h" > #include <linux/module.h> > #include <linux/dma-buf.h> > +#include "amdgpu_xgmi.h" > > static const unsigned int compute_vmid_bitmap = 0xFF00; > > @@ -481,6 +482,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) > > return adev->gmc.xgmi.hive_id; > } > +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) > +{ > + struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; > + struct amdgpu_device *adev = (struct amdgpu_device *)dst; > + int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); > + > + if (ret < 0) { > + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", > + adev->gmc.xgmi.physical_node_id, > + peer_adev->gmc.xgmi.physical_node_id, ret); > + ret = 0; > + } > + return (uint8_t)ret; > +} > > int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, > uint32_t vmid, uint64_t gpu_addr, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index e6a5037..b0cb94d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -154,6 +154,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, > uint32_t *flags); > uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); > uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); > +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); > > #define read_user_wptr(mmptr, wptr, dst) \ > ({ \ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > index cde113f..acbc18b5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > @@ -95,12 +95,26 @@ struct psp_funcs > int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); > }; > > +#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 > +struct psp_xgmi_node_info { > + uint64_t node_id; > + uint8_t num_hops; > + uint8_t is_sharing_enabled; > + enum ta_xgmi_assigned_sdma_engine sdma_engine; > +}; > + > +struct psp_xgmi_topology_info { > + uint32_t num_nodes; > + struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; > +}; > + > struct psp_xgmi_context { > uint8_t initialized; > uint32_t session_id; > struct amdgpu_bo *xgmi_shared_bo; > uint64_t xgmi_shared_mc_addr; > void *xgmi_shared_buf; > + struct psp_xgmi_topology_info top_info; > }; > > struct psp_ras_context { > @@ -181,18 +195,6 @@ struct amdgpu_psp_funcs { > enum AMDGPU_UCODE_ID); > }; > > -#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 > -struct psp_xgmi_node_info { > - uint64_t node_id; > - uint8_t num_hops; > - uint8_t is_sharing_enabled; > - enum ta_xgmi_assigned_sdma_engine sdma_engine; > -}; > - > -struct psp_xgmi_topology_info { > - uint32_t num_nodes; > - struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; > -}; > > #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) > #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > index a48c84c..04dfc8b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > @@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev > /* Each psp need to set the latest topology */ > ret = psp_xgmi_set_topology_info(&adev->psp, > hive->number_devices, > - &hive->topology_info); > + &adev->psp.xgmi_context.top_info); > if (ret) > dev_err(adev->dev, > "XGMI: Set topology failure on device %llx, hive %llx, ret %d", > @@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev > return ret; > } > > + > +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > + struct amdgpu_device *peer_adev) > +{ > + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; > + int i; > + > + for (i = 0 ; i < top->num_nodes; ++i) > + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) > + return top->nodes[i].num_hops; > + return -EINVAL; > +} > + > int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > { > - struct psp_xgmi_topology_info *hive_topology; > + struct psp_xgmi_topology_info *top_info; > struct amdgpu_hive_info *hive; > struct amdgpu_xgmi *entry; > struct amdgpu_device *tmp_adev = NULL; > @@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > goto exit; > } > > - hive_topology = &hive->topology_info; > + top_info = &adev->psp.xgmi_context.top_info; > > list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); > list_for_each_entry(entry, &hive->device_list, head) > - hive_topology->nodes[count++].node_id = entry->node_id; > + top_info->nodes[count++].node_id = entry->node_id; > hive->number_devices = count; > > /* Each psp need to get the latest topology */ > list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { > - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); > + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info); > if (ret) { > dev_err(tmp_adev->dev, > "XGMI: Get topology failure on device %llx, hive %llx, ret %d", > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > index 3e9c91e..fbcee31 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > @@ -27,7 +27,6 @@ > struct amdgpu_hive_info { > uint64_t hive_id; > struct list_head device_list; > - struct psp_xgmi_topology_info topology_info; > int number_devices; > struct mutex hive_lock, reset_lock; > struct kobject *kobj; > @@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev > int amdgpu_xgmi_add_device(struct amdgpu_device *adev); > void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); > int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); > +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, > + struct amdgpu_device *peer_adev); > > static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, > struct amdgpu_device *bo_adev) _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx