Am 2020-06-12 um 6:03 p.m. schrieb philip yang: > It's good idea, better to add same print in system memory eviction > path amdgpu_amdkfd_evict_userptr. That's covered by the message in kgd2kfd_quiesce_mm. > > Use WARN_ONCE to avoid duplicate messages. I want duplicate messages. If many different kinds of evictions are happening I want to see them all. The module parameter is there so I can turn it on/off for short bursts while interesting things are happening. It's off by default. I was considering WARN_RATELIMIT, but that may skip interesting evictions I actually want to see. Regards, Felix > > Regards, > > Philip > > > On 2020-06-11 11:34 p.m., Felix Kuehling wrote: >> Use WARN to print messages with backtrace when evictions are triggered. >> This can help determine the root cause of evictions and help spot driver >> bugs triggering evictions unintentionally, or help with performance >> tuning >> by avoiding conditions that cause evictions in a specific workload. >> >> The messages are controlled by a new module parameter that can be >> changed >> at runtime: >> >> echo Y > /sys/module/amdgpu/parameters/debug_evictions >> echo N > /sys/module/amdgpu/parameters/debug_evictions >> >> Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 ++ >> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ >> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++ >> 5 files changed, 20 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 10ae92e835f6..6c7dd0a707c9 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -186,8 +186,10 @@ extern int amdgpu_noretry; >> extern int amdgpu_force_asic_type; >> #ifdef CONFIG_HSA_AMD >> extern int sched_policy; >> +extern bool debug_evictions; >> #else >> static const int sched_policy = KFD_SCHED_POLICY_HWS; >> +static const bool debug_evictions; /* = false */ >> #endif >> extern int amdgpu_tmz; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> index d4d7cca1cc72..fdf350d5e7b7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> @@ -705,6 +705,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 >> FW supports GWS barriers (false = >> int queue_preemption_timeout_ms = 9000; >> module_param(queue_preemption_timeout_ms, int, 0644); >> MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption >> timeout in ms (1 = Minimum, 9000 = default)"); >> + >> +/** >> + * DOC: debug_evictions(bool) >> + * Enable extra debug messages to help determine the cause of evictions >> + */ >> +bool debug_evictions; >> +module_param(debug_evictions, bool, 0644); >> +MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages >> (false = default)"); >> #endif >> /** >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> index b87ca171986a..072f0e1185a8 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> @@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, >> struct amdgpu_sync *sync, >> continue; >> } >> + WARN(debug_evictions && fence_owner == >> AMDGPU_FENCE_OWNER_KFD, >> + "Adding eviction fence to sync obj"); >> r = amdgpu_sync_fence(sync, f, false); >> if (r) >> break; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> index 22348cebaf36..80393e0583bb 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c >> @@ -942,6 +942,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm) >> if (!p) >> return -ESRCH; >> + WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); >> r = kfd_process_evict_queues(p); >> kfd_unref_process(p); >> @@ -1009,6 +1010,8 @@ int >> kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, >> /* During process initialization eviction_work.dwork is >> initialized >> * to kfd_evict_bo_worker >> */ >> + WARN(debug_evictions, "Scheduling eviction of pid %d in %ld >> jiffies", >> + p->lead_thread->pid, delay_jiffies); >> schedule_delayed_work(&p->eviction_work, delay_jiffies); >> out: >> kfd_unref_process(p); >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> index 173d58b2d81f..51ba2020732e 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> @@ -177,6 +177,11 @@ extern bool hws_gws_support; >> */ >> extern int queue_preemption_timeout_ms; >> +/* >> + * Enable eviction debug messages >> + */ >> +extern bool debug_evictions; >> + >> enum cache_policy { >> cache_policy_coherent, >> cache_policy_noncoherent _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx