Re: [PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2023-06-07 13:32, Jonathan Kim wrote:
Legacy debug devices limited to pinning a single debug VMID for debugging
are the only devices that require disabling GFX OFF while accessing
debug registers.  Debug devices that support multi-process debugging
rely on the hardware scheduler to update debug registers and do not run
into GFX OFF access issues.

Remove KFD GFX OFF enable toggle clutter by moving these calls into the
KGD debug calls themselves.

v2: toggle gfx off around address watch hi/lo settings as well.

Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  4 +++
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 ++++
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 33 ++++++++++++++++++-
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c    |  4 +++
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++++++++++++++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 22 +++----------
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 21 +-----------

Looks like you missed one amdgpu_amdkfd_gfx_off_ctrl call in kfd_process.c.


  7 files changed, 77 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..1f0e6ec56618 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -150,6 +150,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
  			VALID,
  			1);
+ amdgpu_gfx_off_ctrl(adev, false);
+

Aldebaran doesn't use automatic gfxoff, so this should not be needed.


  	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_high);
@@ -158,6 +160,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_low);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return watch_address_cntl;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 625db444df1c..a4e28d547173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -350,6 +350,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
  				bool restore_dbg_registers,
  				uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+

I would need to double check, but I believe Arcturus also doesn't support gfxoff.


  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -362,6 +364,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -375,6 +379,7 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
  					bool keep_trap_enabled,
  					uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->grbm_idx_mutex); @@ -388,6 +393,8 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
  const struct kfd2kgd_calls arcturus_kfd2kgd = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 8ad7a7779e14..415928139861 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -754,12 +754,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
  				bool restore_dbg_registers,
  				uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->grbm_idx_mutex); kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); - /* assume gfx off is disabled for the debug session if rlc restore not supported. */
+	/* keep gfx off disabled for the debug session if rlc restore not supported. */
  	if (restore_dbg_registers) {
  		uint32_t data = 0;
@@ -784,6 +785,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -791,6 +794,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
  					bool keep_trap_enabled,
  					uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -801,6 +806,16 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
+	/*
+	 * Remove the extra gfx off disable reference from debug restore call
+	 * for asics that do not support rlc restore for debug registers.
+	 */

Where is the extra GFXOFF disable added? I don't see it in kgd_gfx_v10_enable_debug_trap above.


+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
+	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1))

This duplicates kfd_dbg_is_rlc_restore_supported. It makes me wonder whether kfd_dbg_is_rlc_restore_supported is still needed at all. See below.


+		amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -832,6 +847,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
  {
  	uint32_t data, wave_cntl_prev;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -853,6 +870,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -863,6 +882,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
  	uint32_t data = 0;
  	bool is_mode_set = !!wave_launch_mode;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -877,6 +898,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -916,6 +939,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
  			VALID,
  			0);
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
@@ -938,6 +963,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -948,10 +975,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, watch_address_cntl = 0; + amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index 91c3574ebed3..bb6ad733b3d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -768,6 +768,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
  			VALID,
  			1);
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_high);
@@ -776,6 +778,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_low);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return watch_address_cntl;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51d93fb13ea3..e30d1f9f5564 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -704,6 +704,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
  				bool restore_dbg_registers,
  				uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -714,6 +716,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -727,6 +731,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
  					bool keep_trap_enabled,
  					uint32_t vmid)
  {
+	amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -737,6 +743,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -768,6 +776,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
  {
  	uint32_t data, wave_cntl_prev;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -789,6 +799,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -799,6 +811,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
  	uint32_t data = 0;
  	bool is_mode_set = !!wave_launch_mode;
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	mutex_lock(&adev->grbm_idx_mutex);
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -813,6 +827,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); + amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -852,6 +868,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
  			VALID,
  			0);
+ amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
@@ -874,6 +892,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
@@ -884,10 +904,14 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, watch_address_cntl = 0; + amdgpu_gfx_off_ctrl(adev, false);
+
  	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
  			(watch_id * TCP_WATCH_STRIDE)),
  			watch_address_cntl);
+ amdgpu_gfx_off_ctrl(adev, true);
+
  	return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index cf1db0ab3471..f597e1c8ebee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2766,7 +2766,6 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
  			struct kfd_process_device *pdd = p->pdds[i];
if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {

Is this condition still needed? Or could this be handled inside kfd2kgd->enable_debug_trap?


-				amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  				pdd->dev->kfd2kgd->enable_debug_trap(
  						pdd->dev->adev,
  						true,
@@ -2823,33 +2822,22 @@ static int runtime_disable(struct kfd_process *p)
  			return ret;
  	}
- if (was_enabled && p->runtime_info.ttmp_setup) {
-		for (i = 0; i < p->n_pdds; i++) {
-			struct kfd_process_device *pdd = p->pdds[i];
-
-			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
-				amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-		}
-	}
-
  	p->runtime_info.ttmp_setup = false;
/* disable ttmp setup */
  	for (i = 0; i < p->n_pdds; i++) {
  		struct kfd_process_device *pdd = p->pdds[i];
- if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
-			pdd->spi_dbg_override =
-					pdd->dev->kfd2kgd->disable_debug_trap(
-					pdd->dev->adev,
-					false,
-					pdd->dev->vm_info.last_vmid_kfd);
+		pdd->spi_dbg_override =
+				pdd->dev->kfd2kgd->disable_debug_trap(
+				pdd->dev->adev,
+				false,
+				pdd->dev->vm_info.last_vmid_kfd);

Are you sure this is related to gfxoff? Is it safe to make these calls (and refresh_runlist below) on GPUs that do single-process debugging?


if (!pdd->dev->kfd->shared_resources.enable_mes)
  				debug_refresh_runlist(pdd->dev->dqm);
  			else
  				kfd_dbg_set_mes_debug_mode(pdd);

Assuming the above change is intentional, indentation is incorrect here.


-		}
  	}
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index e7bc07068eed..5b381018407a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -441,11 +441,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
  			return r;
  	}
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
  							pdd->dev->adev,
  							watch_id);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->kfd->shared_resources.enable_mes)
  		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -476,7 +474,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
  		}
  	}
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  	pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
  				pdd->dev->adev,
  				watch_address,
@@ -484,7 +481,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
  				*watch_id,
  				watch_mode,
  				pdd->dev->vm_info.last_vmid_kfd);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->kfd->shared_resources.enable_mes)
  		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -599,15 +595,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
kfd_process_set_trap_debug_flag(&pdd->qpd, false); - /* GFX off is already disabled by debug activate if not RLC restore supported. */
-		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		pdd->spi_dbg_override =
  				pdd->dev->kfd2kgd->disable_debug_trap(
  				pdd->dev->adev,
  				target->runtime_info.ttmp_setup,
  				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
  				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
@@ -699,14 +691,10 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
  			}
  		}
- /* Disable GFX OFF to prevent garbage read/writes to debug registers.
+		/*
  		 * If RLC restore of debug registers is not supported and runtime enable
  		 * hasn't done so already on ttmp setup request, restore the trap config registers.
-		 *
-		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
-		 * the debug session.
  		 */
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||

Is this condition still needed? Or could this be handled inside kfd2kgd->enable_debug_trap?

Regards,
  Felix


  						target->runtime_info.ttmp_setup))
  			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
@@ -717,9 +705,6 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
  					false,
  					pdd->dev->vm_info.last_vmid_kfd);
- if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-
  		/*
  		 * Setting the debug flag in the trap handler requires that the TMA has been
  		 * allocated, which occurs during CWSR initialization.
@@ -851,7 +836,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
  	for (i = 0; i < target->n_pdds; i++) {
  		struct kfd_process_device *pdd = target->pdds[i];
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
  				pdd->dev->adev,
  				pdd->dev->vm_info.last_vmid_kfd,
@@ -860,7 +844,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
  				trap_mask_request,
  				trap_mask_prev,
  				pdd->spi_dbg_override);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->kfd->shared_resources.enable_mes)
  			r = debug_refresh_runlist(pdd->dev->dqm);
@@ -887,12 +870,10 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
  	for (i = 0; i < target->n_pdds; i++) {
  		struct kfd_process_device *pdd = target->pdds[i];
- amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
  		pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
  				pdd->dev->adev,
  				wave_launch_mode,
  				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->kfd->shared_resources.enable_mes)
  			r = debug_refresh_runlist(pdd->dev->dqm);



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux