Re: [PATCH v4] drm/amdgpu: Don't flush/invalidate HDP for APUs and A+A

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 02.06.21 um 21:18 schrieb Eric Huang:
Integrate two generic functions to determine if HDP
flush is needed for all Asics.

Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx>

Nice work, just one more idea below.

But patch is Reviewed-by: Christian König <christian.koenig@xxxxxxx> either way if you implement that or not.

---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 ++++++++++++++++++++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 15 ++--------
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c    |  6 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c |  2 +-
  7 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7533c2677933..2d5dac573425 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1348,6 +1348,11 @@ int amdgpu_device_baco_enter(struct drm_device *dev);
  int amdgpu_device_baco_exit(struct drm_device *dev);
  bool amdgpu_device_is_headless(struct amdgpu_device *adev);
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring);
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring);
+
  /* atpx handler */
  #if defined(CONFIG_VGA_SWITCHEROO)
  void amdgpu_register_atpx_handler(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4c61a67d0016..900c2dbce934 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -697,7 +697,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
void amdgpu_amdkfd_debug_mem_fence(struct kgd_dev *kgd)
  {
-	amdgpu_asic_flush_hdp((struct amdgpu_device *) kgd, NULL);
+	amdgpu_device_flush_hdp((struct amdgpu_device *) kgd, NULL);
  }
int amdgpu_amdkfd_send_close_event_drain_irq(struct kgd_dev *kgd,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9c4d33f649f7..7f687ea58834 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -362,9 +362,9 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
  		if (write) {
  			memcpy_toio(addr, buf, count);
  			mb();
-			amdgpu_asic_flush_hdp(adev, NULL);
+			amdgpu_device_flush_hdp(adev, NULL);
  		} else {
-			amdgpu_asic_invalidate_hdp(adev, NULL);
+			amdgpu_device_invalidate_hdp(adev, NULL);
  			mb();
  			memcpy_fromio(buf, addr, count);
  		}
@@ -5548,3 +5548,32 @@ bool amdgpu_device_is_headless(struct amdgpu_device *adev)
      /* Check if it is NV's VGA class while VCN is harvest, it is headless*/
      return nv_is_headless_sku(adev->pdev);
  }
+
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+	if (adev->flags & AMD_IS_APU)
+		return;
+#endif
+	if (adev->gmc.xgmi.connected_to_cpu)
+		return;
+
+	if (ring && ring->funcs->emit_hdp_flush)
+		amdgpu_ring_emit_hdp_flush(ring);
+	else
+		amdgpu_asic_flush_hdp(adev, ring);
+}
+
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+	if (adev->flags & AMD_IS_APU)
+		return;
+#endif
+	if (adev->gmc.xgmi.connected_to_cpu)
+		return;
+
+	amdgpu_asic_invalidate_hdp(adev, ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..0e3a46ff38e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -250,7 +250,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
  		}
  	}
  	mb();
-	amdgpu_asic_flush_hdp(adev, NULL);
+	amdgpu_device_flush_hdp(adev, NULL);

It might make sense to move the memory barrier into the amdgpu_device_flush_hdp() function as well, but I'm not 100% sure of that.

Christian.

  	for (i = 0; i < adev->num_vmhubs; i++)
  		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
@@ -328,7 +328,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
  		return r;
mb();
-	amdgpu_asic_flush_hdp(adev, NULL);
+	amdgpu_device_flush_hdp(adev, NULL);
  	for (i = 0; i < adev->num_vmhubs; i++)
  		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
  	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index aaa2574ce9bc..b0ba8376dc33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -222,15 +222,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
  	if (job && ring->funcs->init_cond_exec)
  		patch_offset = amdgpu_ring_init_cond_exec(ring);
-#ifdef CONFIG_X86_64
-	if (!(adev->flags & AMD_IS_APU))
-#endif
-	{
-		if (ring->funcs->emit_hdp_flush)
-			amdgpu_ring_emit_hdp_flush(ring);
-		else
-			amdgpu_asic_flush_hdp(adev, ring);
-	}
+	amdgpu_device_flush_hdp(adev, ring);
if (need_ctx_switch)
  		status |= AMDGPU_HAVE_CTX_SWITCH;
@@ -267,10 +259,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
  	if (job && ring->funcs->emit_frame_cntl)
  		amdgpu_ring_emit_frame_cntl(ring, false, secure);
-#ifdef CONFIG_X86_64
-	if (!(adev->flags & AMD_IS_APU))
-#endif
-		amdgpu_asic_invalidate_hdp(adev, ring);
+	amdgpu_device_invalidate_hdp(adev, ring);
if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
  		fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 55378c6b9722..f21603a9d07b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -277,7 +277,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
  		return ret;
  	}
- amdgpu_asic_invalidate_hdp(psp->adev, NULL);
+	amdgpu_device_invalidate_hdp(psp->adev, NULL);
  	while (*((unsigned int *)psp->fence_buf) != index) {
  		if (--timeout == 0)
  			break;
@@ -290,7 +290,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
  		if (ras_intr)
  			break;
  		usleep_range(10, 100);
-		amdgpu_asic_invalidate_hdp(psp->adev, NULL);
+		amdgpu_device_invalidate_hdp(psp->adev, NULL);
  	}
/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */
@@ -2696,7 +2696,7 @@ int psp_ring_cmd_submit(struct psp_context *psp,
  	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
  	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
  	write_frame->fence_value = index;
-	amdgpu_asic_flush_hdp(adev, NULL);
+	amdgpu_device_flush_hdp(adev, NULL);
/* Update the write Pointer in DWORDs */
  	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 03a44be50dd7..e3fbf0f10add 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -110,7 +110,7 @@ static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
  {
  	/* Flush HDP */
  	mb();
-	amdgpu_asic_flush_hdp(p->adev, NULL);
+	amdgpu_device_flush_hdp(p->adev, NULL);
  	return 0;
  }

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux