On Wed, Nov 21, 2018 at 1:11 PM Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> wrote: > > This is prep work for updating each PSP FW in hive after > GPU reset. > Split into build topology SW state and update each PSP FW in the hive. > Save topology and count of XGMI devices for reuse. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 55 +++++++++++++++++++------------- > 2 files changed, 38 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 2c80453..3e5bede 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1226,6 +1226,11 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, > /* > * functions used by amdgpu_xgmi.c > */ > + > +struct amdgpu_hive_info; > + > +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); > +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive); > int amdgpu_xgmi_add_device(struct amdgpu_device *adev); We should move these to their own header, amdgpu_xgmi.h, rather than dumping them in amdgpu.h > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > index 909216a..23e4e16 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > @@ -34,12 +34,14 @@ static DEFINE_MUTEX(xgmi_mutex); > struct amdgpu_hive_info { > uint64_t hive_id; > struct list_head device_list; > + struct psp_xgmi_topology_info topology_info; > + int number_devices; > }; > > static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE]; > static unsigned hive_count = 0; > > -static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) > +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) Any reason to make this public? > { > int i; > struct amdgpu_hive_info *tmp; > @@ -61,12 +63,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) > return tmp; > } > > +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) > +{ > + int ret = -EINVAL; > + > + /* Each psp need to set the latest topology */ > + ret = psp_xgmi_set_topology_info(&adev->psp, > + hive->number_devices, > + &hive->topology_info); > + if (ret) > + dev_err(adev->dev, > + "XGMI: Set topology failure on device %llx, hive %llx, ret %d", > + adev->gmc.xgmi.node_id, > + adev->gmc.xgmi.hive_id, ret); > + else > + dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n", > + adev->gmc.xgmi.physical_node_id, > + adev->gmc.xgmi.hive_id); > + > + return ret; > +} Indentation in this function looks wrong. > + > int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > { > - struct psp_xgmi_topology_info *tmp_topology; > + struct psp_xgmi_topology_info *hive_topology; > struct amdgpu_hive_info *hive; > struct amdgpu_xgmi *entry; > - struct amdgpu_device *tmp_adev; > + struct amdgpu_device *tmp_adev = NULL; > > int count = 0, ret = -EINVAL; > > @@ -76,21 +99,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp); > adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); > > - tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL); > - if (!tmp_topology) > - return -ENOMEM; > mutex_lock(&xgmi_mutex); > hive = amdgpu_get_xgmi_hive(adev); > if (!hive) > goto exit; > > + hive_topology = &hive->topology_info; > + > list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); > list_for_each_entry(entry, &hive->device_list, head) > - tmp_topology->nodes[count++].node_id = entry->node_id; > + hive_topology->nodes[count++].node_id = entry->node_id; > + hive->number_devices = count; > > /* Each psp need to get the latest topology */ > list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { > - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology); > + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); > if (ret) { > dev_err(tmp_adev->dev, > "XGMI: Get topology failure on device %llx, hive %llx, ret %d", > @@ -101,25 +124,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) > } > } > > - /* Each psp need to set the latest topology */ > list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { > - ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology); > - if (ret) { > - dev_err(tmp_adev->dev, > - "XGMI: Set topology failure on device %llx, hive %llx, ret %d", > - tmp_adev->gmc.xgmi.node_id, > - tmp_adev->gmc.xgmi.hive_id, ret); > - /* To do : continue with some node failed or disable the whole hive */ > + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); > + if (ret) > break; > - } > } > - if (!ret) > - dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n", > - adev->gmc.xgmi.physical_node_id, > - adev->gmc.xgmi.hive_id); > > exit: > mutex_unlock(&xgmi_mutex); > - kfree(tmp_topology); > return ret; > } > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx