From: Oak Zeng <Oak.Zeng@xxxxxxx> This is to keep wavefront context for debug purpose Signed-off-by: Oak Zeng <Oak.Zeng@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++ drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1d7839dbbc33..c4d822b46ea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -748,6 +748,13 @@ bool no_system_mem_limit; module_param(no_system_mem_limit, bool, 0644); MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)"); +/** + * DOC: no_queue_eviction_on_vm_fault (int) + * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction). + */ +int amdgpu_no_queue_eviction_on_vm_fault = 0; +MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); +module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif /** diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 8e64c01565ac..60f752d75833 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -80,8 +80,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || - ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || - ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT; + ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && + !amdgpu_no_queue_eviction_on_vm_fault); } static void cik_event_interrupt_wq(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 74a460be077b..1c20458f3962 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -98,9 +98,10 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, source_id == SOC15_INTSRC_SDMA_TRAP || source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || source_id == SOC15_INTSRC_CP_BAD_OPCODE || - client_id == SOC15_IH_CLIENTID_VMC || + ((client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || - client_id == SOC15_IH_CLIENTID_UTCL2; + client_id == SOC15_IH_CLIENTID_UTCL2) && + !amdgpu_no_queue_eviction_on_vm_fault); } static void event_interrupt_wq_v9(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e2ebd5a1d4de..b9839c650f21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -169,6 +169,11 @@ extern bool hws_gws_support; /* Queue preemption timeout in ms */ extern int queue_preemption_timeout_ms; +/* + * Don't evict process queues on vm fault + */ +extern int amdgpu_no_queue_eviction_on_vm_fault; + /* Enable eviction debug messages */ extern bool debug_evictions; -- 2.29.2 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx