Allow the debugger to a single query queue, device and process exception in a FIFO manner. The KFD should also return the GPU or Queue id of the exception. The debugger also has the option of clearing exceptions after being queried. Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++ drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 64 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 5 ++ 3 files changed, 75 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index bec52bbba330..042cb0f6426b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2915,6 +2915,12 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags); break; case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: + r = kfd_dbg_ev_query_debug_event(target, + &args->query_debug_event.queue_id, + &args->query_debug_event.gpu_id, + args->query_debug_event.exception_mask, + &args->query_debug_event.exception_mask); + break; case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 0219032b9ce1..3b15159421cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -33,6 +33,70 @@ #define MAX_WATCH_ADDRESSES 4 static DEFINE_SPINLOCK(watch_points_lock); +int kfd_dbg_ev_query_debug_event(struct kfd_process *process, + unsigned int *queue_id, + unsigned int *gpu_id, + uint64_t exception_clear_mask, + uint64_t *event_status) +{ + struct process_queue_manager *pqm; + struct process_queue_node *pqn; + int i; + + if (!(process && process->debug_trap_enabled)) + return -ENODATA; + + mutex_lock(&process->event_mutex); + *event_status = 0; + *queue_id = 0; + *gpu_id = 0; + + /* find and report queue events */ + pqm = &process->pqm; + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { + uint64_t tmp = process->exception_enable_mask; + + if (!pqn->q) + continue; + + tmp &= pqn->q->properties.exception_status; + + if (!tmp) + continue; + + *event_status = pqn->q->properties.exception_status; + *queue_id = pqn->q->properties.queue_id; + *gpu_id = pqn->q->device->id; + pqn->q->properties.exception_status &= ~exception_clear_mask; + goto out; + } + + /* find and report device events */ + for (i = 0; i < process->n_pdds; i++) { + struct kfd_process_device *pdd = process->pdds[i]; + uint64_t tmp = process->exception_enable_mask + & pdd->exception_status; + + if (!tmp) + continue; + + *event_status = pdd->exception_status; + *gpu_id = pdd->dev->id; + pdd->exception_status &= ~exception_clear_mask; + goto out; + } + + /* report process events */ + if (process->exception_enable_mask & process->exception_status) { + *event_status = process->exception_status; + process->exception_status &= ~exception_clear_mask; + } + +out: + mutex_unlock(&process->event_mutex); + return *event_status ? 0 : -EAGAIN; +} + void debug_event_write_work_handler(struct work_struct *work) { struct kfd_process *process; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 12b80b6c96d0..c64ffd3efc46 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -27,6 +27,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count); int kfd_dbg_trap_activate(struct kfd_process *target); +int kfd_dbg_ev_query_debug_event(struct kfd_process *process, + unsigned int *queue_id, + unsigned int *gpu_id, + uint64_t exception_clear_mask, + uint64_t *event_status); bool kfd_set_dbg_ev_from_interrupt(struct kfd_dev *dev, unsigned int pasid, uint32_t doorbell_id, -- 2.25.1