Re: [PATCH 34/34] drm/amdkfd: optimize gfx off enable toggle for debugging

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2023-03-27 14:43, Jonathan Kim wrote:
Legacy debug devices limited to pinning a single debug VMID for debugging
are the only devices that require disabling GFX OFF while accessing
debug registers.  Debug devices that support multi-process debugging
rely on the hardware scheduler to update debug registers and do not run
into GFX OFF access issues.

The address watch functions still touch the address registers directly. I guess that needs GFXOFF to be disabled.

Regards,
  Felix



Remove KFD GFX OFF enable toggle clutter by moving these calls into the
KGD debug calls themselves.

Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 ++++
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 33 ++++++++++++++++++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++++++++++++++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 30 +++++------------
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 21 +-----------
  5 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 6df215aba4c4..dec4a3381ccb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -350,6 +350,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
  				bool restore_dbg_registers,
  				uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -362,6 +364,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -375,6 +379,7 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
  					bool keep_trap_enabled,
  					uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->grbm_idx_mutex); @@ -388,6 +393,8 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
  const struct kfd2kgd_calls arcturus_kfd2kgd = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 444f9ea758d6..2132376e2107 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -753,12 +753,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
  				bool restore_dbg_registers,
  				uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->grbm_idx_mutex); kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); - /* assume gfx off is disabled for the debug session if rlc restore not supported. */
+	/* keep gfx off disabled for the debug session if rlc restore not supported. */
  	if (restore_dbg_registers) {
  		uint32_t data = 0;
@@ -783,6 +784,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -790,6 +793,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
  					bool keep_trap_enabled,
  					uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -800,6 +805,16 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
+	/*
+	 * Remove the extra gfx off disable reference from debug restore call
+	 * for asics that do not support rlc restore for debug registers.
+	 */
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
+	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1))
+		amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -831,6 +846,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
  {
  	uint32_t data, wave_cntl_prev;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -852,6 +869,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -862,6 +881,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
  	uint32_t data = 0;
  	bool is_mode_set = !!wave_launch_mode;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -876,6 +897,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -933,10 +956,14 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
  			VALID,
  			1);
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -947,10 +974,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, watch_address_cntl = 0; + amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 87eef794d299..4b025fa0beb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -701,6 +701,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
  				bool restore_dbg_registers,
  				uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -711,6 +713,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -724,6 +728,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
  					bool keep_trap_enabled,
  					uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -734,6 +740,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -765,6 +773,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
  {
  	uint32_t data, wave_cntl_prev;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -786,6 +796,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -796,6 +808,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
  	uint32_t data = 0;
  	bool is_mode_set = !!wave_launch_mode;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -810,6 +824,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -867,10 +883,14 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
  			VALID,
  			1);
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -881,10 +901,14 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, watch_address_cntl = 0; + amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index ead5afe4216b..131b9c25e3ec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2752,7 +2752,6 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
  			struct kfd_process_device *pdd = p->pdds[i];
if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
-				amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  				pdd->dev->kfd2kgd->enable_debug_trap(
  						pdd->dev->adev,
  						true,
@@ -2809,33 +2808,22 @@ static int runtime_disable(struct kfd_process *p)
  			return ret;
  	}
- if (was_enabled && p->runtime_info.ttmp_setup) {
-		for (i = 0; i < p->n_pdds; i++) {
-			struct kfd_process_device *pdd = p->pdds[i];
-
-			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
-				amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-		}
-	}
-
  	p->runtime_info.ttmp_setup = false;
/* disable ttmp setup */
  	for (i = 0; i < p->n_pdds; i++) {
  		struct kfd_process_device *pdd = p->pdds[i];
- if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
-			pdd->spi_dbg_override =
-					pdd->dev->kfd2kgd->disable_debug_trap(
-					pdd->dev->adev,
-					false,
-					pdd->dev->vm_info.last_vmid_kfd);
+		pdd->spi_dbg_override =
+				pdd->dev->kfd2kgd->disable_debug_trap(
+				pdd->dev->adev,
+				false,
+				pdd->dev->vm_info.last_vmid_kfd);
- if (!pdd->dev->shared_resources.enable_mes)
-				debug_refresh_runlist(pdd->dev->dqm);
-			else
-				kfd_dbg_set_mes_debug_mode(pdd);
-		}
+		if (!pdd->dev->shared_resources.enable_mes)
+			debug_refresh_runlist(pdd->dev->dqm);
+		else
+			kfd_dbg_set_mes_debug_mode(pdd);
  	}
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 0ea85afcffd3..6e306defcc85 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -423,11 +423,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
  			return r;
  	}
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
  							pdd->dev->adev,
  							watch_id);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->shared_resources.enable_mes)
  		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -458,7 +456,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
  		}
  	}
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  	pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
  				pdd->dev->adev,
  				watch_address,
@@ -466,7 +463,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
  				*watch_id,
  				watch_mode,
  				pdd->dev->vm_info.last_vmid_kfd);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->shared_resources.enable_mes)
  		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -577,15 +573,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
kfd_process_set_trap_debug_flag(&pdd->qpd, false); - /* GFX off is already disabled by debug activate if not RLC restore supported. */
-		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		pdd->spi_dbg_override =
  				pdd->dev->kfd2kgd->disable_debug_trap(
  				pdd->dev->adev,
  				target->runtime_info.ttmp_setup,
  				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
  				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
@@ -684,14 +676,10 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
  			}
  		}
- /* Disable GFX OFF to prevent garbage read/writes to debug registers.
+		/*
  		 * If RLC restore of debug registers is not supported and runtime enable
  		 * hasn't done so already on ttmp setup request, restore the trap config registers.
-		 *
-		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
-		 * the debug session.
  		 */
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
  						target->runtime_info.ttmp_setup))
  			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
@@ -702,9 +690,6 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
  					false,
  					pdd->dev->vm_info.last_vmid_kfd);
- if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-
  		/**
  		 * Setting the debug flag in the trap handler requires that the TMA has been
  		 * allocated, which occurs during CWSR initialization.
@@ -836,7 +821,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
  	for (i = 0; i < target->n_pdds; i++) {
  		struct kfd_process_device *pdd = target->pdds[i];
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
  				pdd->dev->adev,
  				pdd->dev->vm_info.last_vmid_kfd,
@@ -845,7 +829,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
  				trap_mask_request,
  				trap_mask_prev,
  				pdd->spi_dbg_override);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->shared_resources.enable_mes)
  			r = debug_refresh_runlist(pdd->dev->dqm);
@@ -872,12 +855,10 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
  	for (i = 0; i < target->n_pdds; i++) {
  		struct kfd_process_device *pdd = target->pdds[i];
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
  				pdd->dev->adev,
  				wave_launch_mode,
  				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->shared_resources.enable_mes)
  			r = debug_refresh_runlist(pdd->dev->dqm);



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux