[AMD Official Use Only - AMD Internal Distribution Only] Just for my personal edification, do we need __func__ for *_dbg calls? We can already add the function name when enabling dyndbg with +f , and the messages are unique here so there's no real need for an identifier. Just wondering if my assumptions are incorrect on that. Thanks! Kent > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Srinivasan > Shanmugam > Sent: Monday, January 6, 2025 7:44 AM > To: Koenig, Christian <Christian.Koenig@xxxxxxx>; Deucher, Alexander > <Alexander.Deucher@xxxxxxx> > Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; SHANMUGAM, SRINIVASAN > <SRINIVASAN.SHANMUGAM@xxxxxxx> > Subject: [PATCH] drm/amdgpu: Added Debug Logging for Process Isolation > > Added debug logging to provide insights into KGD/KFD scheduling, cleaner > shader emission, and isolation processes. > > Cc: Christian König <christian.koenig@xxxxxxx> > Cc: Alex Deucher <alexander.deucher@xxxxxxx> > Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ++++- > 2 files changed, 34 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index 89f7c89d1392..ef985a7bda1a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -1912,16 +1912,19 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct > amdgpu_device *adev, u32 idx, > > adev->gfx.kfd_sch_req_count[idx]--; > > + dev_dbg(adev->dev, "%s: Enabling KFD scheduler for idx: %u\n", > __func__, idx); > if (adev->gfx.kfd_sch_req_count[idx] == 0 && > adev->gfx.kfd_sch_inactive[idx]) { > schedule_delayed_work(&adev- > >gfx.enforce_isolation[idx].work, > msecs_to_jiffies(adev- > >gfx.enforce_isolation_time[idx])); > } > } else { > + dev_dbg(adev->dev, "%s: Disabling KFD scheduler for idx: %u\n", > __func__, idx); > if (adev->gfx.kfd_sch_req_count[idx] == 0) { > cancel_delayed_work_sync(&adev- > >gfx.enforce_isolation[idx].work); > if (!adev->gfx.kfd_sch_inactive[idx]) { > amdgpu_amdkfd_stop_sched(adev, idx); > + dev_dbg(adev->dev, "%s: KFD stopped\n", __func__); > adev->gfx.kfd_sch_inactive[idx] = true; > } > } > @@ -1960,7 +1963,12 @@ void amdgpu_gfx_enforce_isolation_handler(struct > work_struct *work) > if (idx >= MAX_XCP) > return; > > + dev_dbg(adev->dev, "%s: Starting enforce isolation handler for idx: %u\n", > + __func__, idx); > + > mutex_lock(&adev->enforce_isolation_mutex); > + dev_dbg(adev->dev, "%s: Processing fences for idx: %u. Checking GFX and > Compute rings.\n", > + __func__, idx); > for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { > if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) > fences += amdgpu_fence_count_emitted(&adev- > >gfx.gfx_ring[i]); > @@ -1969,16 +1977,23 @@ void amdgpu_gfx_enforce_isolation_handler(struct > work_struct *work) > if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) > fences += amdgpu_fence_count_emitted(&adev- > >gfx.compute_ring[i]); > } > + dev_dbg(adev->dev, "%s: Total fences processed for idx: %u: %u\n", > + __func__, idx, fences); > if (fences) { > + dev_dbg(adev->dev, "%s: Fences detected for idx: %u, scheduling > delayed work\n", > + __func__, idx); > /* we've already had our timeslice, so let's wrap this up */ > schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, > msecs_to_jiffies(1)); > } else { > + dev_dbg(adev->dev, "%s: No fences detected for idx: %u, no delayed > work scheduled\n", > + __func__, idx); > /* Tell KFD to resume the runqueue */ > if (adev->kfd.init_complete) { > WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); > WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); > amdgpu_amdkfd_start_sched(adev, idx); > + dev_dbg(adev->dev, "%s: KFD started\n", __func__); > adev->gfx.kfd_sch_inactive[idx] = false; > } > } > @@ -2009,6 +2024,8 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct > amdgpu_device *adev, > if (!adev->gfx.enforce_isolation_jiffies[idx]) { > adev->gfx.enforce_isolation_jiffies[idx] = jiffies; > adev->gfx.enforce_isolation_time[idx] = > GFX_SLICE_PERIOD_MS; > + dev_dbg(adev->dev, "%s: Initializing time slice for idx: %u. > Setting start time and duration for fair GPU access between KGD and KFD. Initial > jiffies: %lu, Time slice: %u ms\n", > + __func__, idx, jiffies, GFX_SLICE_PERIOD_MS); > } > /* Make sure KFD gets a chance to run */ > if (amdgpu_amdkfd_compute_active(adev, idx)) { > @@ -2020,19 +2037,28 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct > amdgpu_device *adev, > wait = true; > /* reset the timer period */ > adev->gfx.enforce_isolation_time[idx] = > GFX_SLICE_PERIOD_MS; > + dev_dbg(adev->dev, "%s: Time slice expired > for idx: %u. Allowing KGD to finish its work before KFD resumes. Resetting time slice > to %u ms\n", > + __func__, idx, > GFX_SLICE_PERIOD_MS); > } else { > /* set the timer period to what's left in our time > slice */ > adev->gfx.enforce_isolation_time[idx] = > GFX_SLICE_PERIOD_MS - > jiffies_to_msecs(cjiffies); > + dev_dbg(adev->dev, "%s: Adjusting remaining > time slice for idx: %u. KFD can use the GPU for %lu more ms\n", > + __func__, idx, > + adev->gfx.enforce_isolation_time[idx]); > } > } else { > /* if jiffies wrap around we will just wait a little longer */ > adev->gfx.enforce_isolation_jiffies[idx] = jiffies; > + dev_dbg(adev->dev, "%s: Jiffies reset for idx: %u. > Starting new time slice to ensure fair GPU access between KGD and KFD. New > jiffies: %lu\n", > + __func__, idx, jiffies); > } > } else { > /* if there is no KFD work, then set the full slice period */ > adev->gfx.enforce_isolation_jiffies[idx] = jiffies; > adev->gfx.enforce_isolation_time[idx] = > GFX_SLICE_PERIOD_MS; > + dev_dbg(adev->dev, "%s: No KFD work for idx: %u. Setting > full slice period to %u ms\n", > + __func__, idx, GFX_SLICE_PERIOD_MS); > } > } > mutex_unlock(&adev->enforce_isolation_mutex); > @@ -2055,6 +2081,8 @@ void > amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > u32 idx; > > + dev_dbg(adev->dev, "%s: begin_use\n", __func__); > + > if (!adev->gfx.enable_cleaner_shader) > return; > > @@ -2089,6 +2117,8 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct > amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > u32 idx; > > + dev_dbg(adev->dev, "%s: end_use\n", __func__); > + > if (!adev->gfx.enable_cleaner_shader) > return; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index c9c48b782ec1..06b527388402 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -688,8 +688,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct > amdgpu_job *job, > > if (adev->gfx.enable_cleaner_shader && > ring->funcs->emit_cleaner_shader && > - job->enforce_isolation) > + job->enforce_isolation) { > + dev_dbg(ring->adev->dev, "%s: Emitting cleaner shader for ring: > %s\n", > + __func__, ring->name); > ring->funcs->emit_cleaner_shader(ring); > + } > > if (vm_flush_needed) { > trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); > -- > 2.34.1