From: Jesse Zhang <jesse.zhang@xxxxxxx> [ 3810.410040] UBSAN: shift-out-of-bounds in drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_int_process_v10.c:345:5 [ 3810.410044] shift exponent 4294967295 is too large for 64-bit type 'long long unsigned int' [ 3810.410047] CPU: 6 PID: 331 Comm: kworker/6:1H Not tainted 6.5.0+ #508 [ 3810.410050] Hardware name: AMD Splinter/Splinter-GNR, BIOS WS54117N_140 01/16/2024 [ 3810.410052] Workqueue: KFD IH interrupt_wq [amdgpu] [ 3810.410273] Call Trace: [ 3810.410274] <TASK> [ 3810.410277] dump_stack_lvl+0x4c/0x70 [ 3810.410283] dump_stack+0x14/0x20 [ 3810.410285] ubsan_epilogue+0x9/0x40 [ 3810.410290] __ubsan_handle_shift_out_of_bounds+0x113/0x170 [ 3810.410292] ? ZSTD_decompressSequencesSplitLitBuffer_default.isra.0+0x1389/0x1b50 [ 3810.410296] event_interrupt_wq_v10.cold+0x16/0x1e [amdgpu] [ 3810.410523] ? raw_spin_rq_unlock+0x14/0x40 [ 3810.410526] ? finish_task_switch+0x85/0x2b0 [ 3810.410528] interrupt_wq+0xb2/0x120 [amdgpu] [ 3810.410692] ? interrupt_wq+0xb2/0x120 [amdgpu] [ 3810.410806] process_one_work+0x229/0x430 [ 3810.410810] worker_thread+0x4e/0x3c0 [ 3810.410811] ? __pfx_worker_thread+0x10/0x10 [ 3810.410813] kthread+0xfb/0x130 [ 3810.410815] ? __pfx_kthread+0x10/0x10 [ 3810.410816] ret_from_fork+0x3d/0x60 [ 3810.410819] ? __pfx_kthread+0x10/0x10 [ 3810.410820] ret_from_fork_asm+0x1b/0x30 [ 3810.410823] </TASK> -v2: define a macro. KFD process interrupts v9, v10, v11 can use that check prior to mask conversion and user space may find it useful as well.(Jon) Signed-off-by: Jesse Zhang <Jesse.Zhang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 6 +++++- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 3 +++ include/uapi/linux/kfd_ioctl.h | 6 ++++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 9a06c6fb6605..110ec5f71056 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -340,6 +340,9 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, } kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + /* filter out the invalidate context_id0 */ + if (KFD_DBG_EC_RANGE_CHECK(context_id0)) + return; kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 7e2859736a55..c28cafa4b902 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -328,11 +328,15 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + /* filter out the invalidate context_id0 */ + if (KFD_DBG_EC_RANGE_CHECK(context_id0)) + return; kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_CTXID0_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), NULL, 0); + } /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 91dd5e045b51..89dbefbd3081 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -389,6 +389,9 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, } kfd_signal_event_interrupt(pasid, sq_int_data, 24); } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + /* filter out the invalidate context_id0 */ + if (KFD_DBG_EC_RANGE_CHECK(context_id0)) + return; kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 9ce46edc62a5..9cd3aa83aac3 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -922,6 +922,12 @@ enum kfd_dbg_trap_exception_code { #define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) +/* Check for exception code range for KFD search */ +#define KFD_DEBUG_ECODE_SHIFT 10 +#define KFD_DBG_EC_RANGE_CHECK(ecode) \ + (!(ecode >> KFD_DEBUG_ECODE_SHIFT) || \ + ((ecode >> KFD_DEBUG_ECODE_SHIFT) \ + > EC_MAX)) /* Runtime enable states */ enum kfd_dbg_runtime_state { -- 2.25.1