when enabling trap temporaries via SPI_GDBG_PER_VMID_CNTL.Trap_en.
This overhead is unacceptable for microbench performance in normal mode
for certain customers.
ROCr allows the user to bypass trap temporary setup through the
HSA_ENABLE_DEBUG environment variable. As a result, the debugger has
to consider two scenarios.
For the first scenario, if the runtime enable of the target has already
occurred prior to the debugger attaching, then the debugger will go ahead
and setup the trap temporaries whether runtime has requested them or not.
The debugger will be able to query the runtime status on attach.
For the second scenario where the debugger spawns the target process,
it will have to wait for ROCr's runtime enable request from the target.
The runtime enable request will be able to see that it's process has been
debug attached. It then enables the trap temporaries since it now
knows it's in debug mode, raises an EC_PROCESS_RUNTIME signal to the
debugger then waits for the debugger's response. Once the debugger has
received the runtime signal, it will wake the target process.
In addition there is an additional restriction that is required to be
enforced with runtime enable and HW debug mode setting.
The debugger must first ensure that HW debug mode has been enabled
before permitting HW debug mode operations.
With single process debug devices, allowing the debugger to set debug
HW modes prior to trap activation means that debug HW mode setting can
occur before the KFD has reserved the debug VMID (0xf) from the hardware
scheduler's VMID allocation resource pool. This can result in the
hardware scheduler assigning VMID 0xf to a non-debugged process and
having that process inherit debug HW mode settings intended for the
debugged target process instead, which is both incorrect and potentially
fatal for normal mode operation.
With multi process debug devices, allowing the debugger to set debug
HW modes prior to trap activation means that non-debugged processes
migrating to a new VMID could inherit unintended debug settings.
All debug operations that touch HW settings must require trap activation
where trap activation is triggered by both debug attach and runtime
enablement (target has KFD opened and is ready to dispatch work).
Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 144 ++++++++++++++++++++++-
drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 2 +
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
4 files changed, 148 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 4b4c4200d8fb..27cd5af72521 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2655,11 +2655,141 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
return ret;
}
-static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)
+static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
+ bool enable_ttmp_setup)
{
+ int i = 0, ret = 0;
+
+ if (p->is_runtime_retry)
+ goto retry;
+
+ if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+ return -EBUSY;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->qpd.queue_count)
+ return -EEXIST;
+ }
+
+ p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+ p->runtime_info.r_debug = r_debug;
+ p->runtime_info.ttmp_setup = enable_ttmp_setup;
+
+ if (p->runtime_info.ttmp_setup) {
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+ pdd->dev->kfd2kgd->enable_debug_trap(
+ pdd->dev->adev,
+ true,
+ pdd->dev->vm_info.last_vmid_kfd);
+ }
+
+ if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+ pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+ pdd->dev->adev,
+ false,
+ pdd->dev->vm_info.last_vmid_kfd);
+
+ debug_refresh_runlist(pdd->dev->dqm);
+ }
+ }
+ }
+
+retry:
+ if (p->debug_trap_enabled) {
+ if (!p->is_runtime_retry) {
+ kfd_dbg_trap_activate(p);
+ kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),
+ p, NULL, 0, false, NULL, 0);
+ }
+
+ mutex_unlock(&p->mutex);
+ ret = down_interruptible(&p->runtime_enable_sema);
+ mutex_lock(&p->mutex);
+
+ p->is_runtime_retry = !!ret;
+ }
+
+ return ret;
+}
+
+static int runtime_disable(struct kfd_process *p)
+{
+ int i = 0, ret;
+ bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED;
+
+ p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED;
+ p->runtime_info.r_debug = 0;
+
+ if (p->debug_trap_enabled) {
+ if (was_enabled)
+ kfd_dbg_trap_deactivate(p, false, 0);
+
+ if (!p->is_runtime_retry)
+ kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),
+ p, NULL, 0, false, NULL, 0);
+
+ mutex_unlock(&p->mutex);
+ ret = down_interruptible(&p->runtime_enable_sema);
+ mutex_lock(&p->mutex);
+
+ p->is_runtime_retry = !!ret;
+ if (ret)
+ return ret;
+ }
+
+ if (was_enabled && p->runtime_info.ttmp_setup) {
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
+ amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+ }
+ }
+
+ p->runtime_info.ttmp_setup = false;
+
+ /* disable DISPATCH_PTR save */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+ pdd->spi_dbg_override =
+ pdd->dev->kfd2kgd->disable_debug_trap(
+ pdd->dev->adev,
+ false,
+ pdd->dev->vm_info.last_vmid_kfd);
+
+ debug_refresh_runlist(pdd->dev->dqm);
+ }
+ }
+
return 0;
}
+static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_runtime_enable_args *args = data;
+ int r;
+
+ mutex_lock(&p->mutex);
+
+ if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK)
+ r = runtime_enable(p, args->r_debug,
+ !!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK));
+ else
+ r = runtime_disable(p);
+
+ mutex_unlock(&p->mutex);
+
+ return r;
+}
+
static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data)
{
struct kfd_ioctl_dbg_trap_args *args = data;
@@ -2721,6 +2851,18 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
goto unlock_out;
}
+ if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED &&
+ (args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE ||
+ args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE ||
+ args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ||
+ args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES ||
+ args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||
+ args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH ||
+ args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) {
+ r = -EPERM;
+ goto unlock_out;
+ }
+
switch (args->op) {
case KFD_IOC_DBG_TRAP_ENABLE:
if (target != p)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 87a23b1d4d49..ae6e701a2656 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -176,7 +176,7 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
* to unwind
* else: ignored
*/
-static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
+void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
{
int i, count = 0;
@@ -238,7 +238,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
return 0;
}
-static int kfd_dbg_trap_activate(struct kfd_process *target)
+int kfd_dbg_trap_activate(struct kfd_process *target)
{
int i, r = 0, unwind_count = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 8aa52cc3af17..e31c9bb0e848 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -28,6 +28,8 @@
void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
uint32_t vmid,
bool stall);
+void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count);
+int kfd_dbg_trap_activate(struct kfd_process *target);
bool kfd_dbg_ev_raise(uint64_t event_mask,
struct kfd_process *process, struct kfd_dev *dev,
unsigned int source_id, bool use_worker,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b69f2f94a50e..9690a2adb9ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -943,6 +943,7 @@ struct kfd_process {
/* Tracks runtime enable status */
struct semaphore runtime_enable_sema;
+ bool is_runtime_retry;
struct kfd_runtime_info runtime_info;
};