RE: [PATCH] drm/amdgpu: avoid duplicated tmo report on same job

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Christian, 

What about this one, no timer logic change on scheduler part, only the duplicated tmo report is muted 

/Monk

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Monk Liu
Sent: Friday, May 10, 2019 3:18 PM
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Liu, Monk <Monk.Liu@xxxxxxx>
Subject: [PATCH] drm/amdgpu: avoid duplicated tmo report on same job

[CAUTION: External Email]

Signed-off-by: Monk Liu <Monk.Liu@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 12 +++++++++++-
 include/drm/gpu_scheduler.h                |  1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d6286ed..f1dc0ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3356,8 +3356,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
        return true;

 disabled:
-               DRM_INFO("GPU recovery disabled.\n");
-               return false;
+       return false;
 }


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1397942..ca62253 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
        struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
        struct amdgpu_job *job = to_amdgpu_job(s_job);
        struct amdgpu_task_info ti;
+       bool recover;

        memset(&ti, 0, sizeof(struct amdgpu_task_info));

@@ -42,6 +43,11 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
                return;
        }

+       recover = amdgpu_device_should_recover_gpu(ring->adev);
+       if (s_job->sched->last_tmo_id == s_job->id)
+               goto skip_report;
+
+       s_job->sched->last_tmo_id = s_job->id;
        amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
        DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
                  job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
@@ -49,7 +55,11 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
        DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
                  ti.process_name, ti.tgid, ti.task_name, ti.pid);

-       if (amdgpu_device_should_recover_gpu(ring->adev))
+       if (!recover)
+               DRM_INFO("GPU recovery disabled.\n");
+
+skip_report:
+       if (recover)
                amdgpu_device_gpu_recover(ring->adev, job);  }

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9c2a957..1944d27 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -282,6 +282,7 @@ struct drm_gpu_scheduler {
        int                             hang_limit;
        atomic_t                        num_jobs;
        bool                    ready;
+       uint64_t last_tmo_id;
 };

 int drm_sched_init(struct drm_gpu_scheduler *sched,
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux