On 11/21/2018 02:29 PM, Alex Deucher wrote: > On Wed, Nov 21, 2018 at 1:11 PM Andrey Grodzovsky > <andrey.grodzovsky@xxxxxxx> wrote: >> This is prep work for updating each PSP FW in hive after >> GPU reset. >> Split into build topology SW state and update each PSP FW in the hive. >> Save topology and count of XGMI devices for reuse. >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 55 +++++++++++++++++++------------- >> 2 files changed, 38 insertions(+), 22 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 2c80453..3e5bede 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -1226,6 +1226,11 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, >> /* >> * functions used by amdgpu_xgmi.c >> */ >> + >> +struct amdgpu_hive_info; >> + >> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); >> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive); >> int amdgpu_xgmi_add_device(struct amdgpu_device *adev); > We should move these to their own header, amdgpu_xgmi.h, rather than > dumping them in amdgpu.h > >> /* >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c >> index 909216a..23e4e16 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c >> @@ -34,12 +34,14 @@ static DEFINE_MUTEX(xgmi_mutex); >> struct amdgpu_hive_info { >> uint64_t hive_id; >> struct list_head device_list; >> + struct psp_xgmi_topology_info topology_info; >> + int number_devices; >> }; >> >> static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE]; >> static unsigned hive_count = 0; >> >> -static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) >> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) > Any reason to make this public? It is used in Patch 5. Andrey > >> { >> int i; >> struct amdgpu_hive_info *tmp; >> @@ -61,12 +63,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) >> return tmp; >> } >> >> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) >> +{ >> + int ret = -EINVAL; >> + >> + /* Each psp need to set the latest topology */ >> + ret = psp_xgmi_set_topology_info(&adev->psp, >> + hive->number_devices, >> + &hive->topology_info); >> + if (ret) >> + dev_err(adev->dev, >> + "XGMI: Set topology failure on device %llx, hive %llx, ret %d", >> + adev->gmc.xgmi.node_id, >> + adev->gmc.xgmi.hive_id, ret); >> + else >> + dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n", >> + adev->gmc.xgmi.physical_node_id, >> + adev->gmc.xgmi.hive_id); >> + >> + return ret; >> +} > Indentation in this function looks wrong. > >> + >> int amdgpu_xgmi_add_device(struct amdgpu_device *adev) >> { >> - struct psp_xgmi_topology_info *tmp_topology; >> + struct psp_xgmi_topology_info *hive_topology; >> struct amdgpu_hive_info *hive; >> struct amdgpu_xgmi *entry; >> - struct amdgpu_device *tmp_adev; >> + struct amdgpu_device *tmp_adev = NULL; >> >> int count = 0, ret = -EINVAL; >> >> @@ -76,21 +99,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) >> adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp); >> adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); >> >> - tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL); >> - if (!tmp_topology) >> - return -ENOMEM; >> mutex_lock(&xgmi_mutex); >> hive = amdgpu_get_xgmi_hive(adev); >> if (!hive) >> goto exit; >> >> + hive_topology = &hive->topology_info; >> + >> list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); >> list_for_each_entry(entry, &hive->device_list, head) >> - tmp_topology->nodes[count++].node_id = entry->node_id; >> + hive_topology->nodes[count++].node_id = entry->node_id; >> + hive->number_devices = count; >> >> /* Each psp need to get the latest topology */ >> list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { >> - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology); >> + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); >> if (ret) { >> dev_err(tmp_adev->dev, >> "XGMI: Get topology failure on device %llx, hive %llx, ret %d", >> @@ -101,25 +124,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) >> } >> } >> >> - /* Each psp need to set the latest topology */ >> list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { >> - ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology); >> - if (ret) { >> - dev_err(tmp_adev->dev, >> - "XGMI: Set topology failure on device %llx, hive %llx, ret %d", >> - tmp_adev->gmc.xgmi.node_id, >> - tmp_adev->gmc.xgmi.hive_id, ret); >> - /* To do : continue with some node failed or disable the whole hive */ >> + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); >> + if (ret) >> break; >> - } >> } >> - if (!ret) >> - dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n", >> - adev->gmc.xgmi.physical_node_id, >> - adev->gmc.xgmi.hive_id); >> >> exit: >> mutex_unlock(&xgmi_mutex); >> - kfree(tmp_topology); >> return ret; >> } >> -- >> 2.7.4 >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx@xxxxxxxxxxxxxxxxxxxxx >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx