Re: [PATCH] drm/amdgpu: protect kiq overrun

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 22.04.20 um 16:50 schrieb Yintian Tao:
Wait for the oldest sequence on the kiq ring
to be signaled in order to make sure there
will be no kiq overrun.

v2: remove unused the variable and correct
     kiq max_sub_num value

First of all this should probably be added to the fence handling code and not the kiq code.

Then you are kind of duplicating some of the functionality we have in the ring handling here. Probably better to avoid this, see amdgpu_fence_driver_init_ring() as well. That's also why I suggested to use the num_fences_mask value.

Regards,
Christian.


Signed-off-by: Yintian Tao <yttao@xxxxxxx>
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    |  6 ++++
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 ++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       | 30 +++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |  3 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |  6 ++++
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c         |  6 ++++
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |  7 +++++
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         |  7 +++++
  8 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..fac8b9713dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
  		 mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock);
+	r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+	if (r) {
+		pr_err("critical bug! too many kiq submission\n");
+		goto out_unlock;
+	}
+
  	r = amdgpu_ring_alloc(kiq_ring, 7);
  	if (r) {
  		pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..fd42c126510f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
  		 mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock);
+	r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+	if (r) {
+		pr_err("critical bug! too many kiq submissions\n");
+		goto out_unlock;
+	}
+
  	r = amdgpu_ring_alloc(kiq_ring, 7);
  	if (r) {
  		pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..84e66c45df37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
  			     AMDGPU_RING_PRIO_DEFAULT);
  	if (r)
  		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+	else
+		kiq->max_sub_num = (ring->ring_size / 4) /
+				(ring->funcs->align_mask + 1);
return r;
  }
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
  	return 0;
  }
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq)
+{
+	uint32_t seq = 0;
+	signed long r = 0;
+
+	seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
+	if (seq > kiq->max_sub_num) {
+		r = amdgpu_fence_wait_polling(&kiq->ring, seq,
+					      MAX_KIQ_REG_WAIT);
+		return r < 1 ? -ETIME : 0;
+	}
+
+	return 0;
+}
+
  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
  {
  	signed long r, cnt = 0;
@@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
  	BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
+	r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+	if (r) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		goto failed_kiq_read;
+	}
+
  	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
  		spin_unlock_irqrestore(&kiq->ring_lock, flags);
  		pr_err("critical bug! too many kiq readers\n");
@@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
  	BUG_ON(!ring->funcs->emit_wreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
+	r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+	if (r) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		goto failed_kiq_write;
+	}
+
  	amdgpu_ring_alloc(ring, 32);
  	amdgpu_ring_emit_wreg(ring, reg, v);
  	amdgpu_fence_emit_polling(ring, &seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index ee698f0246d8..1ee59a927bd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -103,6 +103,7 @@ struct amdgpu_kiq {
  	struct amdgpu_ring	ring;
  	struct amdgpu_irq_src	irq;
  	const struct kiq_pm4_funcs *pmf;
+	uint32_t		max_sub_num;
  };
/*
@@ -387,4 +388,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
  				  struct amdgpu_iv_entry *entry);
  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
  void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8c10084f44ef..4b027006d072 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -57,6 +57,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
  	uint32_t seq;
spin_lock_irqsave(&kiq->ring_lock, flags);
+	r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+	if (r) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		goto failed_kiq;
+	}
+
  	amdgpu_ring_alloc(ring, 32);
  	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
  					    ref, mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5b1549f167b0..a136e2229f7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4051,6 +4051,12 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
  	BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
+	r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+	if (r) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		goto failed_kiq_read;
+	}
+
  	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
  		spin_unlock_irqrestore(&kiq->ring_lock, flags);
  		pr_err("critical bug! too many kiq readers\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 30b75d79efdb..77d8bc9c0111 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -423,6 +423,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
  		spin_lock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+		if (r) {
+			spin_unlock(&kiq->ring_lock);
+			DRM_ERROR("too many kiq submissions\n");
+			return -ETIME;
+		}
+
  		/* 2 dwords flush + 8 dwords fence */
  		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
  		kiq->pmf->kiq_invalidate_tlbs(ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index fecdbc471983..c429a2a5fe3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -614,6 +614,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
  			ndw += kiq->pmf->invalidate_tlbs_size;
spin_lock(&adev->gfx.kiq.ring_lock);
+		r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+		if (r) {
+			spin_unlock(&kiq->ring_lock);
+			DRM_ERROR("critical bug! too many kiq submissions\n");
+			return -ETIME;
+		}
+
  		/* 2 dwords flush + 8 dwords fence */
  		amdgpu_ring_alloc(ring, ndw);
  		if (vega20_xgmi_wa)

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux