Am 20.01.21 um 20:38 schrieb Andrey Grodzovsky:
Ping Andrey On 1/20/21 12:01 AM, Andrey Grodzovsky wrote:On 1/19/21 3:48 AM, Christian König wrote:Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:Handle all DMA IOMMU gropup related dependencies before the group is removed. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++++drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 ++ 6 files changed, 65 insertions(+), 1 deletion(-)diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.hindex 478a7d8..2953420 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -51,6 +51,7 @@ #include <linux/dma-fence.h> #include <linux/pci.h> #include <linux/aer.h> +#include <linux/notifier.h> #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> @@ -1041,6 +1042,10 @@ struct amdgpu_device { bool in_pci_err_recovery; struct pci_saved_state *pci_state; + + struct notifier_block nb; + struct blocking_notifier_head notifier; + struct list_head device_bo_list; };static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.cindex 45e23e3..e99f4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -70,6 +70,8 @@ #include <drm/task_barrier.h> #include <linux/pm_runtime.h> +#include <linux/iommu.h> + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");@@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {}; +static int amdgpu_iommu_group_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);+ struct amdgpu_bo *bo = NULL; + + /*+ * Following is a set of IOMMU group dependencies taken care of before+ * device's IOMMU group is removed + */ + if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) { + + spin_lock(&ttm_bo_glob.lru_lock); + list_for_each_entry(bo, &adev->device_bo_list, bo) { + if (bo->tbo.ttm) + ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm); + } + spin_unlock(&ttm_bo_glob.lru_lock);That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.You need to use a mutex here or even better make sure you can access the device_bo_list without a lock in this moment.Christian.I can think of switching to RCU list ? Otherwise, elements are addedon BO create and deleted on BO destroy, how can i prevent any of those from happening while in this section besides mutex ? Make a copy list and run over it instead ?
RCU won't work since the BO is not RCU protected. What you can try something like this: spin_lock(&ttm_bo_glob.lru_lock); while (list_not_empty(&adev->device_bo_list)) { bo = list_first_entry(&adev->device_bo_list); list_del(bo->...); spin_unlock(&ttm_bo_glob.lru_lock); ttm_tt_unpopulate(bo); spin_lock(&ttm_bo_glob.lru_lock); }... Regards, Christian.
Andrey+ + if (adev->irq.ih.use_bus_addr) + amdgpu_ih_ring_fini(adev, &adev->irq.ih); + if (adev->irq.ih1.use_bus_addr) + amdgpu_ih_ring_fini(adev, &adev->irq.ih1); + if (adev->irq.ih2.use_bus_addr) + amdgpu_ih_ring_fini(adev, &adev->irq.ih2); + + amdgpu_gart_dummy_page_fini(adev); + } + + return NOTIFY_OK; +} + + /** * amdgpu_device_init - initialize the driver *@@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);+ INIT_LIST_HEAD(&adev->device_bo_list); + adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0;@@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,if (amdgpu_device_cache_pci_state(adev->pdev)) pci_restore_state(pdev); + BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier); + adev->nb.notifier_call = amdgpu_iommu_group_notifier; + + if (adev->dev->iommu_group) {+ r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);+ if (r) + goto failed; + } + return 0; failed:diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.cindex 0db9330..486ad6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)* * Frees the dummy page used by the driver (all asics). */ -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { if (!adev->dummy_page_addr) return;diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.hindex afa2e28..5678d9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h@@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_fini(struct amdgpu_device *adev); +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.cindex 6cc9919..4a1de69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c@@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)} amdgpu_bo_unref(&bo->parent); + spin_lock(&ttm_bo_glob.lru_lock); + list_del(&bo->bo); + spin_unlock(&ttm_bo_glob.lru_lock); + kfree(bo->metadata); kfree(bo); }@@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,if (bp->type == ttm_bo_type_device) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + INIT_LIST_HEAD(&bo->bo); + + spin_lock(&ttm_bo_glob.lru_lock); + list_add_tail(&bo->bo, &adev->device_bo_list); + spin_unlock(&ttm_bo_glob.lru_lock); + return 0; fail_unreserve:diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.hindex 9ac3756..5ae8555 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -110,6 +110,8 @@ struct amdgpu_bo { struct list_head shadow_list; struct kgd_mem *kfd_bo; + + struct list_head bo; };static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)_______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxxhttps://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Candrey.grodzovsky%40amd.com%7C0c703eb6e73744962d3b08d8bc56f303%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466428923905672%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=2Tkz4EMOEwFLQJUOk1ixd28c2ad1HqjBVIDO%2FX0OgqM%3D&reserved=0
_______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx