Re: [PATCH v5 06/27] drm/amdgpu: Handle IOMMU enabled case.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2021-05-04 1:05 p.m., Felix Kuehling wrote:

Am 2021-04-28 um 11:11 a.m. schrieb Andrey Grodzovsky:
Handle all DMA IOMMU gropup related dependencies before the
group is removed.

v5: Drop IOMMU notifier and switch to lockless call to ttm_tt_unpopulate

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 31 ++++++++++++++++++++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  3 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c    |  9 +++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 ++++++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/cik_ih.c        |  1 -
  drivers/gpu/drm/amd/amdgpu/cz_ih.c         |  1 -
  drivers/gpu/drm/amd/amdgpu/iceland_ih.c    |  1 -
  drivers/gpu/drm/amd/amdgpu/navi10_ih.c     |  3 ---
  drivers/gpu/drm/amd/amdgpu/si_ih.c         |  1 -
  drivers/gpu/drm/amd/amdgpu/tonga_ih.c      |  1 -
  drivers/gpu/drm/amd/amdgpu/vega10_ih.c     |  3 ---
  14 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index fddb82897e5d..30a24db5f4d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1054,6 +1054,8 @@ struct amdgpu_device {
bool in_pci_err_recovery;
  	struct pci_saved_state          *pci_state;
+
+	struct list_head                device_bo_list;
  };
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 46d646c40338..91594ddc2459 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -70,6 +70,7 @@
  #include <drm/task_barrier.h>
  #include <linux/pm_runtime.h>
+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -3211,7 +3212,6 @@ static const struct attribute *amdgpu_dev_attributes[] = {
  	NULL
  };
-
  /**
   * amdgpu_device_init - initialize the driver
   *
@@ -3316,6 +3316,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + INIT_LIST_HEAD(&adev->device_bo_list);
+
  	adev->gfx.gfx_off_req_count = 1;
  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
@@ -3601,6 +3603,28 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  	return r;
  }
+static void amdgpu_clear_dma_mappings(struct amdgpu_device *adev)
+{
+	struct amdgpu_bo *bo = NULL;
+
+	/*
+	 * Unmaps all DMA mappings before device will be removed from it's
+	 * IOMMU group otherwise in case of IOMMU enabled system a crash
+	 * will happen.
+	 */
+
+	spin_lock(&adev->mman.bdev.lru_lock);
+	while (!list_empty(&adev->device_bo_list)) {
+		bo = list_first_entry(&adev->device_bo_list, struct amdgpu_bo, bo);
+		list_del_init(&bo->bo);
+		spin_unlock(&adev->mman.bdev.lru_lock);
+		if (bo->tbo.ttm)
+			ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);

I have a patch pending (reviewed by Christian) that moves the
dma-unmapping to amdgpu_ttm_backend_unbind. With that patch,
ttm_tt_unpopulate would no longer be the right way to remove the DMA
mapping.

Maybe I'd need to add a check in ttm_tt_unpopulate to call
backend_unbind first, if necessary. Or is there some other mechanism
that moves the BO to the CPU domain before unpopulating it?

Regards,
   Felix

At least in the context of this patch we don't move the BO to system
domain but rather preemptively remove DMA mappings before IOMMU grpoup
is gone post pci_remove. So yes, I would say yes, we need to check here
for backend_unbind first.

Andrey



+		spin_lock(&adev->mman.bdev.lru_lock);
+	}
+	spin_unlock(&adev->mman.bdev.lru_lock);
+}
+
  /**
   * amdgpu_device_fini - tear down the driver
   *
@@ -3639,12 +3663,15 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
  		amdgpu_ucode_sysfs_fini(adev);
  	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
-
  	amdgpu_fbdev_fini(adev);
amdgpu_irq_fini_hw(adev); amdgpu_device_ip_fini_early(adev);
+
+	amdgpu_clear_dma_mappings(adev);
+
+	amdgpu_gart_dummy_page_fini(adev);
  }
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index fde2d899b2c4..49cdcaf8512d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
   *
   * Frees the dummy page used by the driver (all asics).
   */
-static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
  {
  	if (!adev->dummy_page_addr)
  		return;
@@ -397,5 +397,4 @@ void amdgpu_gart_fini(struct amdgpu_device *adev)
  	vfree(adev->gart.pages);
  	adev->gart.pages = NULL;
  #endif
-	amdgpu_gart_dummy_page_fini(adev);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index afa2e2877d87..5678d9c105ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
  int amdgpu_gart_init(struct amdgpu_device *adev);
  void amdgpu_gart_fini(struct amdgpu_device *adev);
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
  int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
  		       int pages);
  int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 63e815c27585..a922154953a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -326,6 +326,15 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
  		if (!amdgpu_device_has_dc_support(adev))
  			flush_work(&adev->hotplug_work);
  	}
+
+	if (adev->irq.ih_soft.ring)
+		amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
+	if (adev->irq.ih.ring)
+		amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+	if (adev->irq.ih1.ring)
+		amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+	if (adev->irq.ih2.ring)
+		amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
  }
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 485f249d063a..62d829f5e62c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -68,8 +68,13 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
  		list_del_init(&bo->shadow_list);
  		mutex_unlock(&adev->shadow_list_lock);
  	}
-	amdgpu_bo_unref(&bo->parent);
+
+	spin_lock(&adev->mman.bdev.lru_lock);
+	list_del(&bo->bo);
+	spin_unlock(&adev->mman.bdev.lru_lock);
+
+	amdgpu_bo_unref(&bo->parent);
  	kfree(bo->metadata);
  	kfree(bo);
  }
@@ -585,6 +590,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
  	if (bp->type == ttm_bo_type_device)
  		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ INIT_LIST_HEAD(&bo->bo);
+
+	spin_lock(&adev->mman.bdev.lru_lock);
+	list_add_tail(&bo->bo, &adev->device_bo_list);
+	spin_unlock(&adev->mman.bdev.lru_lock);
+
  	return 0;
fail_unreserve:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 9ac37569823f..5ae8555ef275 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -110,6 +110,8 @@ struct amdgpu_bo {
  	struct list_head		shadow_list;
struct kgd_mem *kfd_bo;
+
+	struct list_head		bo;
  };
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 183d44a6583c..df385ffc9768 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -310,7 +310,6 @@ static int cik_ih_sw_fini(void *handle)
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini_sw(adev);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
  	amdgpu_irq_remove_domain(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index d32743949003..b8c47e0cf37a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -302,7 +302,6 @@ static int cz_ih_sw_fini(void *handle)
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini_sw(adev);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
  	amdgpu_irq_remove_domain(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index da96c6013477..ddfe4eaeea05 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -301,7 +301,6 @@ static int iceland_ih_sw_fini(void *handle)
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini_sw(adev);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
  	amdgpu_irq_remove_domain(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 5eea4550b856..e171a9e78544 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -571,9 +571,6 @@ static int navi10_ih_sw_fini(void *handle)
amdgpu_irq_fini_sw(adev);
  	amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 751307f3252c..9a24f17a5750 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -176,7 +176,6 @@ static int si_ih_sw_fini(void *handle)
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini_sw(adev);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 973d80ec7f6c..b08905d1c00f 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -313,7 +313,6 @@ static int tonga_ih_sw_fini(void *handle)
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini_sw(adev);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
  	amdgpu_irq_remove_domain(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 2d0094c276ca..8c8abc00f710 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -525,9 +525,6 @@ static int vega10_ih_sw_fini(void *handle)
amdgpu_irq_fini_sw(adev);
  	amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
-	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
return 0;
  }
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux