Am 04.09.24 um 09:12 schrieb Srinivasan Shanmugam:
This commit replaces the use of amdgpu_job_submit_direct which submits
the job to the ring directly, with drm_sched_entity in the cleaner
shader job submission process. The change allows the GPU scheduler to
manage the cleaner shader job.
- The job is then submitted to the GPU using the
drm_sched_entity_push_job function, which allows the GPU scheduler to
manage the job.
This change improves the reliability of the cleaner shader job
submission process by leveraging the capabilities of the GPU scheduler.
Fixes: f70111466165 ("drm/amdgpu: Add sysfs interface for running cleaner shader")
Cc: Christian König <christian.koenig@xxxxxxx>
Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@xxxxxxx>
Suggested-by: Christian König <christian.koenig@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 32 ++++++++++++-------------
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
2 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b779d47a546a..149939c4eaed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1397,14 +1397,22 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- long timeout = msecs_to_jiffies(1000);
+ struct drm_gpu_scheduler *sched = &ring->sched;
struct dma_fence *f = NULL;
Please drop that default assignment of f.
struct amdgpu_job *job;
struct amdgpu_ib *ib;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, NULL, NULL,
- 64, AMDGPU_IB_POOL_DIRECT,
+ /* Initialize the scheduler entity */
+ r = drm_sched_entity_init(&adev->gfx.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
+ goto err;
+ }
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->gfx.entity, NULL,
+ 64, 0,
&job);
if (r)
goto err;
@@ -1416,24 +1424,16 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
ib->ptr[i] = ring->funcs->nop;
ib->length_dw = ring->funcs->align_mask + 1;
- r = amdgpu_job_submit_direct(job, ring, &f);
+ f = amdgpu_job_submit(job);
+ r = dma_fence_wait(f, false);
Where is f assigned?
if (r)
- goto err_free;
-
- r = dma_fence_wait_timeout(f, false, timeout);
- if (r == 0)
- r = -ETIMEDOUT;
- else if (r > 0)
- r = 0;
-
- amdgpu_ib_free(adev, ib, f);
+ goto err;
dma_fence_put(f);
+ /* Clean up the scheduler entity */
+ drm_sched_entity_destroy(&adev->gfx.entity);
return 0;
-err_free:
- amdgpu_job_free(job);
- amdgpu_ib_free(adev, ib, f);
err:
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 5644e10a86a9..3c268476dec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -472,6 +472,7 @@ struct amdgpu_gfx {
struct mutex kfd_sch_mutex;
u64 kfd_sch_req_count[MAX_XCP];
bool kfd_sch_inactive[MAX_XCP];
+ struct drm_sched_entity entity;
You can't put that into the amdgpu_gfx structure. The sysfs might be
called concurrently.
I suggest to put it on the stack.
Christian.
};
struct amdgpu_gfx_ras_reg_entry {