[Public] + Felix (typo on email) > -----Original Message----- > From: Kim, Jonathan <Jonathan.Kim@xxxxxxx> > Sent: Wednesday, June 7, 2023 1:32 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Felix.Kueling@xxxxxxx; Huang, JinHuiEric <JinHuiEric.Huang@xxxxxxx>; > Kim, Jonathan <Jonathan.Kim@xxxxxxx> > Subject: [PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging > > Legacy debug devices limited to pinning a single debug VMID for debugging > are the only devices that require disabling GFX OFF while accessing > debug registers. Debug devices that support multi-process debugging > rely on the hardware scheduler to update debug registers and do not run > into GFX OFF access issues. > > Remove KFD GFX OFF enable toggle clutter by moving these calls into the > KGD debug calls themselves. > > v2: toggle gfx off around address watch hi/lo settings as well. > > Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> > --- > .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 4 +++ > .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 7 ++++ > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 33 > ++++++++++++++++++- > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 4 +++ > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++++++++++++++ > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 22 +++---------- > drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 21 +----------- > 7 files changed, 77 insertions(+), 38 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > index 60f9e027fb66..1f0e6ec56618 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > @@ -150,6 +150,8 @@ static uint32_t > kgd_gfx_aldebaran_set_address_watch( > VALID, > 1); > > + amdgpu_gfx_off_ctrl(adev, false); > + > WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) > + > (watch_id * TCP_WATCH_STRIDE)), > watch_address_high); > @@ -158,6 +160,8 @@ static uint32_t > kgd_gfx_aldebaran_set_address_watch( > (watch_id * TCP_WATCH_STRIDE)), > watch_address_low); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return watch_address_cntl; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > index 625db444df1c..a4e28d547173 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > @@ -350,6 +350,8 @@ static uint32_t > kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev, > bool restore_dbg_registers, > uint32_t vmid) > { > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); > @@ -362,6 +364,8 @@ static uint32_t > kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -375,6 +379,7 @@ static uint32_t > kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev, > bool keep_trap_enabled, > uint32_t vmid) > { > + amdgpu_gfx_off_ctrl(adev, false); > > mutex_lock(&adev->grbm_idx_mutex); > > @@ -388,6 +393,8 @@ static uint32_t > kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > const struct kfd2kgd_calls arcturus_kfd2kgd = { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > index 8ad7a7779e14..415928139861 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > @@ -754,12 +754,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct > amdgpu_device *adev, > bool restore_dbg_registers, > uint32_t vmid) > { > + amdgpu_gfx_off_ctrl(adev, false); > > mutex_lock(&adev->grbm_idx_mutex); > > kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); > > - /* assume gfx off is disabled for the debug session if rlc restore not > supported. */ > + /* keep gfx off disabled for the debug session if rlc restore not > supported. */ > if (restore_dbg_registers) { > uint32_t data = 0; > > @@ -784,6 +785,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct > amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -791,6 +794,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct > amdgpu_device *adev, > bool keep_trap_enabled, > uint32_t vmid) > { > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); > @@ -801,6 +806,16 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct > amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > + /* > + * Remove the extra gfx off disable reference from debug restore call > + * for asics that do not support rlc restore for debug registers. > + */ > + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) || > + adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -832,6 +847,8 @@ uint32_t > kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev, > { > uint32_t data, wave_cntl_prev; > > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, > mmSPI_GDBG_WAVE_CNTL)); > @@ -853,6 +870,8 @@ uint32_t > kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -863,6 +882,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct > amdgpu_device *adev, > uint32_t data = 0; > bool is_mode_set = !!wave_launch_mode; > > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); > @@ -877,6 +898,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct > amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -916,6 +939,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct > amdgpu_device *adev, > VALID, > 0); > > + amdgpu_gfx_off_ctrl(adev, false); > + > WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + > (watch_id * TCP_WATCH_STRIDE)), > watch_address_cntl); > @@ -938,6 +963,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct > amdgpu_device *adev, > (watch_id * TCP_WATCH_STRIDE)), > watch_address_cntl); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -948,10 +975,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct > amdgpu_device *adev, > > watch_address_cntl = 0; > > + amdgpu_gfx_off_ctrl(adev, false); > + > WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + > (watch_id * TCP_WATCH_STRIDE)), > watch_address_cntl); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c > index 91c3574ebed3..bb6ad733b3d6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c > @@ -768,6 +768,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct > amdgpu_device *adev, > VALID, > 1); > > + amdgpu_gfx_off_ctrl(adev, false); > + > WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) > + > (watch_id * TCP_WATCH_STRIDE)), > watch_address_high); > @@ -776,6 +778,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct > amdgpu_device *adev, > (watch_id * TCP_WATCH_STRIDE)), > watch_address_low); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return watch_address_cntl; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index 51d93fb13ea3..e30d1f9f5564 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -704,6 +704,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct > amdgpu_device *adev, > bool restore_dbg_registers, > uint32_t vmid) > { > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); > @@ -714,6 +716,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct > amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -727,6 +731,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct > amdgpu_device *adev, > bool keep_trap_enabled, > uint32_t vmid) > { > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); > @@ -737,6 +743,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct > amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -768,6 +776,8 @@ uint32_t > kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev, > { > uint32_t data, wave_cntl_prev; > > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, > mmSPI_GDBG_WAVE_CNTL)); > @@ -789,6 +799,8 @@ uint32_t > kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -799,6 +811,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct > amdgpu_device *adev, > uint32_t data = 0; > bool is_mode_set = !!wave_launch_mode; > > + amdgpu_gfx_off_ctrl(adev, false); > + > mutex_lock(&adev->grbm_idx_mutex); > > kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); > @@ -813,6 +827,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct > amdgpu_device *adev, > > mutex_unlock(&adev->grbm_idx_mutex); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -852,6 +868,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct > amdgpu_device *adev, > VALID, > 0); > > + amdgpu_gfx_off_ctrl(adev, false); > + > WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + > (watch_id * TCP_WATCH_STRIDE)), > watch_address_cntl); > @@ -874,6 +892,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct > amdgpu_device *adev, > (watch_id * TCP_WATCH_STRIDE)), > watch_address_cntl); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > @@ -884,10 +904,14 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct > amdgpu_device *adev, > > watch_address_cntl = 0; > > + amdgpu_gfx_off_ctrl(adev, false); > + > WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + > (watch_id * TCP_WATCH_STRIDE)), > watch_address_cntl); > > + amdgpu_gfx_off_ctrl(adev, true); > + > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index cf1db0ab3471..f597e1c8ebee 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -2766,7 +2766,6 @@ static int runtime_enable(struct kfd_process *p, > uint64_t r_debug, > struct kfd_process_device *pdd = p->pdds[i]; > > if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) { > - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); > pdd->dev->kfd2kgd->enable_debug_trap( > pdd->dev->adev, > true, > @@ -2823,33 +2822,22 @@ static int runtime_disable(struct kfd_process *p) > return ret; > } > > - if (was_enabled && p->runtime_info.ttmp_setup) { > - for (i = 0; i < p->n_pdds; i++) { > - struct kfd_process_device *pdd = p->pdds[i]; > - > - if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) > - amdgpu_gfx_off_ctrl(pdd->dev->adev, true); > - } > - } > - > p->runtime_info.ttmp_setup = false; > > /* disable ttmp setup */ > for (i = 0; i < p->n_pdds; i++) { > struct kfd_process_device *pdd = p->pdds[i]; > > - if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { > - pdd->spi_dbg_override = > - pdd->dev->kfd2kgd- > >disable_debug_trap( > - pdd->dev->adev, > - false, > - pdd->dev->vm_info.last_vmid_kfd); > + pdd->spi_dbg_override = > + pdd->dev->kfd2kgd->disable_debug_trap( > + pdd->dev->adev, > + false, > + pdd->dev->vm_info.last_vmid_kfd); > > if (!pdd->dev->kfd->shared_resources.enable_mes) > debug_refresh_runlist(pdd->dev->dqm); > else > kfd_dbg_set_mes_debug_mode(pdd); > - } > } > > return 0; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > index e7bc07068eed..5b381018407a 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > @@ -441,11 +441,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct > kfd_process_device *pdd, > return r; > } > > - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); > pdd->watch_points[watch_id] = pdd->dev->kfd2kgd- > >clear_address_watch( > pdd->dev->adev, > watch_id); > - amdgpu_gfx_off_ctrl(pdd->dev->adev, true); > > if (!pdd->dev->kfd->shared_resources.enable_mes) > r = debug_map_and_unlock(pdd->dev->dqm); > @@ -476,7 +474,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct > kfd_process_device *pdd, > } > } > > - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); > pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd- > >set_address_watch( > pdd->dev->adev, > watch_address, > @@ -484,7 +481,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct > kfd_process_device *pdd, > *watch_id, > watch_mode, > pdd->dev->vm_info.last_vmid_kfd); > - amdgpu_gfx_off_ctrl(pdd->dev->adev, true); > > if (!pdd->dev->kfd->shared_resources.enable_mes) > r = debug_map_and_unlock(pdd->dev->dqm); > @@ -599,15 +595,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process > *target, bool unwind, int unwind > > kfd_process_set_trap_debug_flag(&pdd->qpd, false); > > - /* GFX off is already disabled by debug activate if not RLC > restore supported. */ > - if (kfd_dbg_is_rlc_restore_supported(pdd->dev)) > - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); > pdd->spi_dbg_override = > pdd->dev->kfd2kgd->disable_debug_trap( > pdd->dev->adev, > target->runtime_info.ttmp_setup, > pdd->dev->vm_info.last_vmid_kfd); > - amdgpu_gfx_off_ctrl(pdd->dev->adev, true); > > if (!kfd_dbg_is_per_vmid_supported(pdd->dev) && > release_debug_trap_vmid(pdd->dev->dqm, > &pdd->qpd)) > @@ -699,14 +691,10 @@ int kfd_dbg_trap_activate(struct kfd_process > *target) > } > } > > - /* Disable GFX OFF to prevent garbage read/writes to debug > registers. > + /* > * If RLC restore of debug registers is not supported and > runtime enable > * hasn't done so already on ttmp setup request, restore the > trap config registers. > - * > - * If RLC restore of debug registers is not supported, keep gfx > off disabled for > - * the debug session. > */ > - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); > if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) || > target- > >runtime_info.ttmp_setup)) > pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev- > >adev, true, > @@ -717,9 +705,6 @@ int kfd_dbg_trap_activate(struct kfd_process *target) > false, > pdd->dev->vm_info.last_vmid_kfd); > > - if (kfd_dbg_is_rlc_restore_supported(pdd->dev)) > - amdgpu_gfx_off_ctrl(pdd->dev->adev, true); > - > /* > * Setting the debug flag in the trap handler requires that the > TMA has been > * allocated, which occurs during CWSR initialization. > @@ -851,7 +836,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct > kfd_process *target, > for (i = 0; i < target->n_pdds; i++) { > struct kfd_process_device *pdd = target->pdds[i]; > > - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); > pdd->spi_dbg_override = pdd->dev->kfd2kgd- > >set_wave_launch_trap_override( > pdd->dev->adev, > pdd->dev->vm_info.last_vmid_kfd, > @@ -860,7 +844,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct > kfd_process *target, > trap_mask_request, > trap_mask_prev, > pdd->spi_dbg_override); > - amdgpu_gfx_off_ctrl(pdd->dev->adev, true); > > if (!pdd->dev->kfd->shared_resources.enable_mes) > r = debug_refresh_runlist(pdd->dev->dqm); > @@ -887,12 +870,10 @@ int kfd_dbg_trap_set_wave_launch_mode(struct > kfd_process *target, > for (i = 0; i < target->n_pdds; i++) { > struct kfd_process_device *pdd = target->pdds[i]; > > - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); > pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd- > >set_wave_launch_mode( > pdd->dev->adev, > wave_launch_mode, > pdd->dev->vm_info.last_vmid_kfd); > - amdgpu_gfx_off_ctrl(pdd->dev->adev, true); > > if (!pdd->dev->kfd->shared_resources.enable_mes) > r = debug_refresh_runlist(pdd->dev->dqm); > -- > 2.25.1