Am 2021-05-19 um 11:20 p.m. schrieb Andrey Grodzovsky: > Use it to call disply code dependent on device->drv_data > before it's set to NULL on device unplug > > v5: > Move HW finilization into this callback to prevent MMIO accesses > post cpi remove. > > v7: > Split kfd suspend from device exit to expdite HW related > stuff to amdgpu_pci_remove > > v8: > Squash previous KFD commit into this commit to avoid compile break. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> > Acked-by: Christian König <christian.koenig@xxxxxxx> See one cosmetic comment inline. With that fixed the patch is Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59 +++++++++++++------ > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +- > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++- > drivers/gpu/drm/amd/include/amd_shared.h | 2 + > 6 files changed, 56 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > index 5f6696a3c778..2b06dee9a0ce 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > @@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) > } > } > > -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) > +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev) > { > if (adev->kfd.dev) { > kgd2kfd_device_exit(adev->kfd.dev); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 5ffb07b02810..d8a537e8aea5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, > const void *ih_ring_entry); > void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); > void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); > -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); > +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); > int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, > uint32_t vmid, uint64_t gpu_addr, > uint32_t *ib_cmd, uint32_t ib_len); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 8bee95ad32d9..bc75e35dd8d8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) > return 0; > } > > -/** > - * amdgpu_device_ip_fini - run fini for hardware IPs > - * > - * @adev: amdgpu_device pointer > - * > - * Main teardown pass for hardware IPs. The list of all the hardware > - * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks > - * are run. hw_fini tears down the hardware associated with each IP > - * and sw_fini tears down any software state associated with each IP. > - * Returns 0 on success, negative error code on failure. > - */ > -static int amdgpu_device_ip_fini(struct amdgpu_device *adev) > +static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) > { > int i, r; > > - if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) > - amdgpu_virt_release_ras_err_handler_data(adev); > + for (i = 0; i < adev->num_ip_blocks; i++) { > + if (!adev->ip_blocks[i].version->funcs->early_fini) > + continue; > > - amdgpu_ras_pre_fini(adev); > + r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); > + if (r) { > + DRM_DEBUG("early_fini of IP block <%s> failed %d\n", > + adev->ip_blocks[i].version->funcs->name, r); > + } > + } > > - if (adev->gmc.xgmi.num_physical_nodes > 1) > - amdgpu_xgmi_remove_device(adev); > + amdgpu_amdkfd_suspend(adev, false); > > amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); > amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); > > - amdgpu_amdkfd_device_fini(adev); > - > /* need to disable SMC first */ > for (i = 0; i < adev->num_ip_blocks; i++) { > if (!adev->ip_blocks[i].status.hw) > @@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) > adev->ip_blocks[i].status.hw = false; > } > > + return 0; > +} > + > +/** > + * amdgpu_device_ip_fini - run fini for hardware IPs > + * > + * @adev: amdgpu_device pointer > + * > + * Main teardown pass for hardware IPs. The list of all the hardware > + * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks > + * are run. hw_fini tears down the hardware associated with each IP > + * and sw_fini tears down any software state associated with each IP. > + * Returns 0 on success, negative error code on failure. > + */ > +static int amdgpu_device_ip_fini(struct amdgpu_device *adev) > +{ > + int i, r; > + > + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) > + amdgpu_virt_release_ras_err_handler_data(adev); > + > + amdgpu_ras_pre_fini(adev); > + > + if (adev->gmc.xgmi.num_physical_nodes > 1) > + amdgpu_xgmi_remove_device(adev); > + > + amdgpu_amdkfd_device_fini_sw(adev); > > for (i = adev->num_ip_blocks - 1; i >= 0; i--) { > if (!adev->ip_blocks[i].status.sw) > @@ -3681,6 +3700,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) > amdgpu_fbdev_fini(adev); > > amdgpu_irq_fini_hw(adev); > + > + amdgpu_device_ip_fini_early(adev); > } > > void amdgpu_device_fini_sw(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > index 357b9bf62a1c..ab6d2a43c9a3 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > @@ -858,10 +858,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, > return kfd->init_complete; > } > > + > + > void kgd2kfd_device_exit(struct kfd_dev *kfd) Unnecessary whitespace change. Regards, Felix > { > if (kfd->init_complete) { > - kgd2kfd_suspend(kfd, false); > device_queue_manager_uninit(kfd->dqm); > kfd_interrupt_exit(kfd); > kfd_topology_remove_device(kfd); > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index 9ca517b65854..f7112865269a 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -1251,6 +1251,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) > return -EINVAL; > } > > +static int amdgpu_dm_early_fini(void *handle) > +{ > + struct amdgpu_device *adev = (struct amdgpu_device *)handle; > + > + amdgpu_dm_audio_fini(adev); > + > + return 0; > +} > + > static void amdgpu_dm_fini(struct amdgpu_device *adev) > { > int i; > @@ -1259,8 +1268,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) > drm_encoder_cleanup(&adev->dm.mst_encoders[i].base); > } > > - amdgpu_dm_audio_fini(adev); > - > amdgpu_dm_destroy_drm_device(&adev->dm); > > #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) > @@ -2298,6 +2305,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { > .late_init = dm_late_init, > .sw_init = dm_sw_init, > .sw_fini = dm_sw_fini, > + .early_fini = amdgpu_dm_early_fini, > .hw_init = dm_hw_init, > .hw_fini = dm_hw_fini, > .suspend = dm_suspend, > diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h > index 43ed6291b2b8..1ad56da486e4 100644 > --- a/drivers/gpu/drm/amd/include/amd_shared.h > +++ b/drivers/gpu/drm/amd/include/amd_shared.h > @@ -240,6 +240,7 @@ enum amd_dpm_forced_level; > * @late_init: sets up late driver/hw state (post hw_init) - Optional > * @sw_init: sets up driver state, does not configure hw > * @sw_fini: tears down driver state, does not configure hw > + * @early_fini: tears down stuff before dev detached from driver > * @hw_init: sets up the hw state > * @hw_fini: tears down the hw state > * @late_fini: final cleanup > @@ -268,6 +269,7 @@ struct amd_ip_funcs { > int (*late_init)(void *handle); > int (*sw_init)(void *handle); > int (*sw_fini)(void *handle); > + int (*early_fini)(void *handle); > int (*hw_init)(void *handle); > int (*hw_fini)(void *handle); > void (*late_fini)(void *handle);