Re: [RFC 3/5] drm/scheduler: Add a simple TDR test

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 04/02/2025 16:21, Christian König wrote:
Am 03.02.25 um 16:30 schrieb Tvrtko Ursulin:
Add a very simple TDR test which submits a single job and verifies that
the TDR handling will run if the backend failed to complete the job in
time.

I think I said it before but I strongly suggest to not use TDR as name in the scheduler at all.

What the scheduler provides is a simple timeout while waiting for the HW fence to signal.

That is fundamentally different to the TDR functionality Windows provide and we already had people confusing this.

I did a s/tdr/timeout/ locally.

Apart from that "yes, please". Those tests are desperately needed.

Cool. Lets see what other people will say and if someone can actually review.

Regards,

Tvrtko

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx>
Cc: Christian König <christian.koenig@xxxxxxx>
Cc: Danilo Krummrich <dakr@xxxxxxxxxx>
Cc: Matthew Brost <matthew.brost@xxxxxxxxx>
Cc: Philipp Stanner <phasta@xxxxxxxxxx>
---
  .../drm/scheduler/tests/drm_mock_scheduler.c  | 12 +++-
  .../gpu/drm/scheduler/tests/drm_sched_tests.h |  6 +-
  .../scheduler/tests/drm_sched_tests_basic.c   | 64 ++++++++++++++++++-
  3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c
index f1985900a6ba..79b6193ce920 100644
--- a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c
+++ b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c
@@ -160,7 +160,11 @@ static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job)
  static enum drm_gpu_sched_stat
  mock_sched_timedout_job(struct drm_sched_job *sched_job)
  {
-    return DRM_GPU_SCHED_STAT_ENODEV;
+    struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
+
+    job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT;
+
+    return DRM_GPU_SCHED_STAT_NOMINAL;
  }
  static void mock_sched_free_job(struct drm_sched_job *sched_job)
@@ -174,7 +178,9 @@ static const struct drm_sched_backend_ops drm_mock_scheduler_ops = {
      .free_job = mock_sched_free_job
  };
-struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test)
+struct drm_mock_scheduler *
+drm_mock_new_scheduler(struct kunit *test,
+               long timeout)
  {
      struct drm_mock_scheduler *sched;
      int ret;
@@ -188,7 +194,7 @@ struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test)
                   DRM_SCHED_PRIORITY_COUNT,
                   U32_MAX, /* max credits */
                   UINT_MAX, /* hang limit */
-                 MAX_SCHEDULE_TIMEOUT, /* timeout */
+                 timeout,
                   NULL, /* timeout wq */
                   NULL, /* score */
                   "drm-mock-scheduler",
diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h
index 421ee2712985..20695f55e453 100644
--- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h
+++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h
@@ -35,6 +35,9 @@ struct drm_mock_sched_entity {
  struct drm_mock_sched_job {
      struct drm_sched_job    base;
+#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x1
+    unsigned long        flags;
+
      struct list_head    link;
      struct hrtimer        timer;
@@ -65,7 +68,8 @@ drm_sched_job_to_mock_job(struct drm_sched_job *sched_job)
      return container_of(sched_job, struct drm_mock_sched_job, base);
  };
-struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test);
+struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test,
+                          long timeout);
  void drm_mock_scheduler_fini(struct drm_mock_scheduler *sched);
  unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched,
                      unsigned int num);
diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c
index 6fd39bea95b1..eb0d54d00f21 100644
--- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c
+++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c
@@ -3,7 +3,7 @@
  static int drm_sched_basic_init(struct kunit *test)
  {
-    test->priv = drm_mock_new_scheduler(test);
+    test->priv = drm_mock_new_scheduler(test, MAX_SCHEDULE_TIMEOUT);
      return 0;
  }
@@ -15,6 +15,13 @@ static void drm_sched_basic_exit(struct kunit *test)
      drm_mock_scheduler_fini(sched);
  }
+static int drm_sched_tdr_init(struct kunit *test)
+{
+    test->priv = drm_mock_new_scheduler(test, HZ);
+
+    return 0;
+}
+
  static void drm_sched_basic_submit(struct kunit *test)
  {
      struct drm_mock_scheduler *sched = test->priv;
@@ -244,4 +251,57 @@ static struct kunit_suite drm_sched_basic = {
      .test_cases = drm_sched_basic_tests,
  };
-kunit_test_suite(drm_sched_basic);
+static void drm_sched_basic_tdr(struct kunit *test)
+{
+    struct drm_mock_scheduler *sched = test->priv;
+    struct drm_mock_sched_entity *entity;
+    struct drm_mock_sched_job *job;
+    bool done;
+
+    /*
+     * Submit a single job against a scheduler with the timeout configured +     * and verify that the timeout handling will run if the backend fails
+     * to complete it in time.
+     */
+
+    entity = drm_mock_new_sched_entity(test,
+                       DRM_SCHED_PRIORITY_NORMAL,
+                       sched);
+    job = drm_mock_new_sched_job(test, entity);
+
+    drm_mock_sched_job_submit(job);
+
+    done = drm_mock_sched_job_wait_scheduled(job, HZ);
+    KUNIT_ASSERT_EQ(test, done, true);
+
+    done = drm_mock_sched_job_wait_finished(job, HZ / 2);
+    KUNIT_ASSERT_EQ(test, done, false);
+
+    KUNIT_ASSERT_EQ(test,
+            job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT,
+            0);
+
+    done = drm_mock_sched_job_wait_finished(job, HZ);
+    KUNIT_ASSERT_EQ(test, done, false);
+
+    KUNIT_ASSERT_EQ(test,
+            job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT,
+            DRM_MOCK_SCHED_JOB_TIMEDOUT);
+
+    drm_mock_sched_entity_free(entity);
+}
+
+static struct kunit_case drm_sched_tdr_tests[] = {
+    KUNIT_CASE(drm_sched_basic_tdr),
+    {}
+};
+
+static struct kunit_suite drm_sched_tdr = {
+    .name = "drm_sched_basic_tdr_tests",
+    .init = drm_sched_tdr_init,
+    .exit = drm_sched_basic_exit,
+    .test_cases = drm_sched_tdr_tests,
+};
+
+kunit_test_suites(&drm_sched_basic,
+          &drm_sched_tdr);




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux