From: Jonathan Kim <jonathan.kim@xxxxxxx> GFX12 debugging requires setting up precise ALU operation for catching ALU exceptions. Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> Tested-by: Lancelot Six <lancelot.six@xxxxxxx> Reviewed-by: Eric Huang <jinhuieric.huang@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 15 +++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 ++++ include/uapi/linux/kfd_ioctl.h | 1 + include/uapi/linux/kfd_sysfs.h | 19 ++++++++++--------- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index d889e3545120a..45b1975b149a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -25,6 +25,7 @@ #include "kfd_topology.h" #include <linux/file.h> #include <uapi/linux/kfd_ioctl.h> +#include <uapi/linux/kfd_sysfs.h> #define MAX_WATCH_ADDRESSES 4 @@ -497,14 +498,24 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) int i, r = 0, rewind_count = 0; for (i = 0; i < target->n_pdds; i++) { - if (!kfd_dbg_is_per_vmid_supported(target->pdds[i]->dev) && + struct kfd_topology_device *topo_dev = + kfd_topology_device_by_id(target->pdds[i]->dev->id); + uint32_t caps = topo_dev->node_props.capability; + + if (!(caps | HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) && (*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) { *flags = prev_flags; return -EACCES; } + + if (!(caps | HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) && + (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) { + *flags = prev_flags; + return -EACCES; + } } - target->dbg_flags = *flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP; + target->dbg_flags = *flags; *flags = prev_flags; for (i = 0; i < target->n_pdds; i++) { struct kfd_process_device *pdd = target->pdds[i]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c51f131eaa2fb..11857869afb9e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1927,6 +1927,10 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; + + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) + dev->node_props.capability |= + HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED; } kfd_topology_set_dbg_firmware_support(dev); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d09c4a18e5713..43fb0f4c42262 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -853,6 +853,7 @@ enum kfd_dbg_trap_address_watch_mode { /* Additional wave settings */ enum kfd_dbg_trap_flags { KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1, + KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP = 2, }; /* Trap exceptions */ diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index a51b7331e0b4b..5e8d28617efad 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -51,15 +51,16 @@ /* Old buggy user mode depends on this being 0 */ #define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 -#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 -#define HSA_CAP_RASEVENTNOTIFY 0x00200000 -#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 -#define HSA_CAP_ASIC_REVISION_SHIFT 22 -#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 -#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 -#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 -#define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000 -#define HSA_CAP_RESERVED 0xe00f8000 +#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 +#define HSA_CAP_RASEVENTNOTIFY 0x00200000 +#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 +#define HSA_CAP_ASIC_REVISION_SHIFT 22 +#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 +#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 +#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 +#define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000 +#define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED 0x40000000 +#define HSA_CAP_RESERVED 0x800f8000 /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f -- 2.44.0