Re: [PATCH v2 2/5] drm/scheduler: Add scheduler unit testing infrastructure and some basic tests

Philipp Stanner <phasta@xxxxxxxxxxx> · Tue, 04 Mar 2025 13:29:02 +0100

On Tue, 2025-03-04 at 10:48 +0000, Tvrtko Ursulin wrote:
> 
> On 04/03/2025 07:20, Philipp Stanner wrote:
> > On Mon, 2025-03-03 at 15:53 +0000, Tvrtko Ursulin wrote:
> > > 
> > > On 03/03/2025 11:38, Philipp Stanner wrote:
> > > > Sry for being late to the party, am a bit busy with defusing my
> > > > own
> > > > dynamite <.<
> > > > 
> > > > On Fri, 2025-02-21 at 12:09 +0000, Tvrtko Ursulin wrote:
> > > > > Implement a mock scheduler backend and add some basic test to
> > > > > exercise the
> > > > > core scheduler code paths.
> > > > > 
> > > > > Mock backend (kind of like a very simple mock GPU) can either
> > > > > process
> > > > > jobs
> > > > > by tests manually advancing the "timeline" job at a time, or
> > > > > alternatively
> > > > > jobs can be configured with a time duration in which case
> > > > > they
> > > > > get
> > > > > completed asynchronously from the unit test code.
> > > > > 
> > > > > Core scheduler classes are subclassed to support this mock
> > > > > implementation.
> > > > > 
> > > > > The tests added are just a few simple submission patterns.
> > > > > 
> > > > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx>
> > > > > Suggested-by: Philipp Stanner <phasta@xxxxxxxxxx>
> > > > > Cc: Christian König <christian.koenig@xxxxxxx>
> > > > > Cc: Danilo Krummrich <dakr@xxxxxxxxxx>
> > > > > Cc: Matthew Brost <matthew.brost@xxxxxxxxx>
> > > > > Cc: Philipp Stanner <phasta@xxxxxxxxxx>
> > > > > ---
> > > > >    drivers/gpu/drm/Kconfig.debug                 |  12 +
> > > > >    drivers/gpu/drm/scheduler/.kunitconfig        |  12 +
> > > > >    drivers/gpu/drm/scheduler/Makefile            |   2 +
> > > > >    drivers/gpu/drm/scheduler/tests/Makefile      |   6 +
> > > > >    .../gpu/drm/scheduler/tests/mock_scheduler.c  | 317
> > > > > ++++++++++++++++++
> > > > >    drivers/gpu/drm/scheduler/tests/sched_tests.h | 218
> > > > > ++++++++++++
> > > > >    drivers/gpu/drm/scheduler/tests/tests_basic.c | 197
> > > > > +++++++++++
> > > > >    7 files changed, 764 insertions(+)
> > > > >    create mode 100644 drivers/gpu/drm/scheduler/.kunitconfig
> > > > >    create mode 100644
> > > > > drivers/gpu/drm/scheduler/tests/Makefile
> > > > >    create mode 100644
> > > > > drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > >    create mode 100644
> > > > > drivers/gpu/drm/scheduler/tests/sched_tests.h
> > > > >    create mode 100644
> > > > > drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/Kconfig.debug
> > > > > b/drivers/gpu/drm/Kconfig.debug
> > > > > index 601d7e07d421..6fd4c5669400 100644
> > > > > --- a/drivers/gpu/drm/Kconfig.debug
> > > > > +++ b/drivers/gpu/drm/Kconfig.debug
> > > > > @@ -99,5 +99,17 @@ config DRM_TTM_KUNIT_TEST
> > > > >    
> > > > >    	  If in doubt, say "N".
> > > > >    
> > > > > +config DRM_SCHED_KUNIT_TEST
> > > > > +	tristate "KUnit tests for the DRM scheduler" if
> > > > > !KUNIT_ALL_TESTS
> > > > > +	select DRM_SCHED
> > > > > +	depends on DRM && KUNIT
> > > > > +	default KUNIT_ALL_TESTS
> > > > > +	help
> > > > > +	  Choose this option to build unit tests for the DRM
> > > > > scheduler.
> > > > > +
> > > > > +	  Recommended for driver developers only.
> > > > > +
> > > > > +	  If in doubt, say "N".
> > > > > +
> > > > >    config DRM_EXPORT_FOR_TESTS
> > > > >    	bool
> > > > > diff --git a/drivers/gpu/drm/scheduler/.kunitconfig
> > > > > b/drivers/gpu/drm/scheduler/.kunitconfig
> > > > > new file mode 100644
> > > > > index 000000000000..cece53609fcf
> > > > > --- /dev/null
> > > > > +++ b/drivers/gpu/drm/scheduler/.kunitconfig
> > > > > @@ -0,0 +1,12 @@
> > > > > +CONFIG_KUNIT=y
> > > > > +CONFIG_DRM=y
> > > > > +CONFIG_DRM_SCHED_KUNIT_TEST=y
> > > > > +CONFIG_EXPERT=y
> > > > > +CONFIG_DEBUG_SPINLOCK=y
> > > > > +CONFIG_DEBUG_MUTEXES=y
> > > > > +CONFIG_DEBUG_ATOMIC_SLEEP=y
> > > > > +CONFIG_LOCK_DEBUGGING_SUPPORT=y
> > > > > +CONFIG_PROVE_LOCKING=y
> > > > > +CONFIG_LOCKDEP=y
> > > > > +CONFIG_DEBUG_LOCKDEP=y
> > > > > +CONFIG_DEBUG_LIST=y
> > > > > diff --git a/drivers/gpu/drm/scheduler/Makefile
> > > > > b/drivers/gpu/drm/scheduler/Makefile
> > > > > index 53863621829f..6e13e4c63e9d 100644
> > > > > --- a/drivers/gpu/drm/scheduler/Makefile
> > > > > +++ b/drivers/gpu/drm/scheduler/Makefile
> > > > > @@ -23,3 +23,5 @@
> > > > >    gpu-sched-y := sched_main.o sched_fence.o sched_entity.o
> > > > >    
> > > > >    obj-$(CONFIG_DRM_SCHED) += gpu-sched.o
> > > > > +
> > > > > +obj-$(CONFIG_DRM_SCHED_KUNIT_TEST) += tests/
> > > > > diff --git a/drivers/gpu/drm/scheduler/tests/Makefile
> > > > > b/drivers/gpu/drm/scheduler/tests/Makefile
> > > > > new file mode 100644
> > > > > index 000000000000..51d275a18cf4
> > > > > --- /dev/null
> > > > > +++ b/drivers/gpu/drm/scheduler/tests/Makefile
> > > > > @@ -0,0 +1,6 @@
> > > > > +
> > > > > +drm-sched-tests-y := \
> > > > > +        mock_scheduler.o \
> > > > > +        tests_basic.o
> > > > > +
> > > > > +obj-$(CONFIG_DRM_SCHED_KUNIT_TEST) += drm-sched-tests.o
> > > > > diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > > b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > > new file mode 100644
> > > > > index 000000000000..d73a9f0337da
> > > > > --- /dev/null
> > > > > +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > > @@ -0,0 +1,317 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > +
> > > > > +#include "sched_tests.h"
> > > > > +
> > > > > +/*
> > > > > + * Here we implement the mock "GPU" (or the scheduler
> > > > > backend)
> > > > > which
> > > > > is used by
> > > > > + * the DRM scheduler unit tests in order to exercise the
> > > > > core
> > > > > functionality.
> > > > > + *
> > > > > + * Test cases are implemented in a separate file.
> > > > > + */
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_new_sched_entity - Create a new mock scheduler
> > > > > entity
> > > > > + *
> > > > > + * @test: KUnit test owning the entity
> > > > > + * @priority: Scheduling priority
> > > > > + * @sched: Mock scheduler on which the entity can be
> > > > > scheduled
> > > > > + *
> > > > > + * Returns: New mock scheduler entity with allocation
> > > > > managed by
> > > > > the
> > > > > test
> > > > > + */
> > > > > +struct drm_mock_sched_entity *
> > > > > +drm_mock_new_sched_entity(struct kunit *test,
> > > > 
> > > > This naming seems to be inconsistent with its counterpart,
> > > > drm_mock_sched_entity_free().
> > > > 
> > > > Better drm_mock_sched_entity_new()
> > > 
> > > Yes this is the only incosistent one and it was deliberate
> > > (somehow
> > > it
> > > read better to me when looking at the final code) but if you
> > > insist I
> > > can swap it around. Let me know.
> > 
> > I think it is worth it having that consistent, yes
> > 
> > > 
> > > > 
> > > > 
> > > > > +			  enum drm_sched_priority priority,
> > > > > +			  struct drm_mock_scheduler *sched)
> > > > > +{
> > > > > +	struct drm_mock_sched_entity *entity;
> > > > > +	struct drm_gpu_scheduler *drm_sched;
> > > > > +	int ret;
> > > > > +
> > > > > +	entity = kunit_kzalloc(test, sizeof(*entity),
> > > > > GFP_KERNEL);
> > > > > +	KUNIT_ASSERT_NOT_NULL(test, entity);
> > > > > +
> > > > > +	drm_sched = &sched->base;
> > > > > +	ret = drm_sched_entity_init(&entity->base,
> > > > > +				    priority,
> > > > > +				    &drm_sched, 1,
> > > > > +				    NULL);
> > > > > +	KUNIT_ASSERT_EQ(test, ret, 0);
> > > > > +
> > > > > +	entity->test = test;
> > > > > +
> > > > > +	return entity;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_sched_entity_free - Destroys a mock scheduler
> > > > > entity
> > > > > + *
> > > > > + * @entity: Entity to destroy
> > > > > + *
> > > > > + * To be used from the test cases once done with the entity.
> > > > > + */
> > > > > +void drm_mock_sched_entity_free(struct drm_mock_sched_entity
> > > > > *entity)
> > > > > +{
> > > > > +	drm_sched_entity_destroy(&entity->base);
> > > > > +}
> > > > > +
> > > > > +static enum hrtimer_restart
> > > > > +drm_mock_sched_job_signal_timer(struct hrtimer *hrtimer)
> > > > > +{
> > > > > +	struct drm_mock_sched_job *job =
> > > > > +		container_of(hrtimer, typeof(*job), timer);
> > > > > +	struct drm_mock_scheduler *sched =
> > > > > +		drm_sched_to_mock_sched(job->base.sched);
> > > > > +	struct drm_mock_sched_job *next;
> > > > > +	ktime_t now = ktime_get();
> > > > > +	unsigned long flags;
> > > > > +	LIST_HEAD(signal);
> > > > > +
> > > > > +	spin_lock_irqsave(&sched->lock, flags);
> > > > > +	list_for_each_entry_safe(job, next, &sched-
> > > > > >job_list,
> > > > > link)
> > > > > {
> > > > > +		if (!job->duration_us)
> > > > > +			break;
> > > > > +
> > > > > +		if (ktime_before(now, job->finish_at))
> > > > > +			break;
> > > > > +
> > > > > +		list_move_tail(&job->link, &signal);
> > > > > +		sched->hw_timeline.cur_seqno = job-
> > > > > > hw_fence.seqno;
> > > > > +	}
> > > > > +	spin_unlock_irqrestore(&sched->lock, flags);
> > > > > +
> > > > > +	list_for_each_entry(job, &signal, link) {
> > > > > +		dma_fence_signal(&job->hw_fence);
> > > > > +		dma_fence_put(&job->hw_fence);
> > > > > +	}
> > > > > +
> > > > > +	return HRTIMER_NORESTART;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_new_sched_job - Create a new mock scheduler job
> > > > > + *
> > > > > + * @test: KUnit test owning the job
> > > > > + * @entity: Scheduler entity of the job
> > > > > + *
> > > > > + * Returns: New mock scheduler job with allocation managed
> > > > > by
> > > > > the
> > > > > test
> > > > > + */
> > > > > +struct drm_mock_sched_job *
> > > > > +drm_mock_new_sched_job(struct kunit *test,
> > > > 
> > > > IMO we could even think about establishing this style
> > > > 
> > > > return type stuff *
> > > > function_foo()
> > > > {
> > > > 
> > > > for all scheduler unit test files. Would always be consistent.
> > > > WDYT?
> > > 
> > > I prefer a single line when it fits. I can change it, although in
> > > general I don't think being as strict on this detail is very
> > > important
> > > if that would be a rule applicable just to this sub-directory.
> > > 
> > > > > +		       struct drm_mock_sched_entity *entity)
> > > > > +{
> > > > > +	struct drm_mock_sched_job *job;
> > > > > +	int ret;
> > > > > +
> > > > > +	job = kunit_kzalloc(test, sizeof(*job), GFP_KERNEL);
> > > > > +	KUNIT_ASSERT_NOT_NULL(test, job);
> > > > > +
> > > > > +	ret = drm_sched_job_init(&job->base,
> > > > > +				 &entity->base,
> > > > > +				 1,
> > > > > +				 NULL);
> > > > > +	KUNIT_ASSERT_EQ(test, ret, 0);
> > > > > +
> > > > > +	job->test = test;
> > > > > +
> > > > > +	spin_lock_init(&job->lock);
> > > > > +	INIT_LIST_HEAD(&job->link);
> > > > > +	hrtimer_init(&job->timer, CLOCK_MONOTONIC,
> > > > > HRTIMER_MODE_ABS);
> > > > > +	job->timer.function =
> > > > > drm_mock_sched_job_signal_timer;
> > > > > +
> > > > > +	return job;
> > > > > +}
> > > > > +
> > > > > +static const char
> > > > > *drm_mock_sched_hw_fence_driver_name(struct
> > > > > dma_fence *fence)
> > > > 
> > > > here for example; see comment above.
> > > > 
> > > > > +{
> > > > > +	return "drm_mock_sched";
> > > > > +}
> > > > > +
> > > > > +static const char *
> > > > > +drm_mock_sched_hw_fence_timeline_name(struct dma_fence
> > > > > *fence)
> > > > > +{
> > > > > +	struct drm_mock_sched_job *job =
> > > > > +		container_of(fence, typeof(*job), hw_fence);
> > > > > +
> > > > > +	return (const char *)job->base.sched->name;
> > > > > +}
> > > > > +
> > > > > +static void drm_mock_sched_hw_fence_release(struct dma_fence
> > > > > *fence)
> > > > > +{
> > > > > +	struct drm_mock_sched_job *job =
> > > > > +		container_of(fence, typeof(*job), hw_fence);
> > > > > +
> > > > > +	hrtimer_cancel(&job->timer);
> > > > > +
> > > > > +	/* Freed by the kunit framework */
> > > > > +}
> > > > > +
> > > > > +static const struct dma_fence_ops
> > > > > drm_mock_sched_hw_fence_ops =
> > > > > {
> > > > > +	.get_driver_name =
> > > > > drm_mock_sched_hw_fence_driver_name,
> > > > > +	.get_timeline_name =
> > > > > drm_mock_sched_hw_fence_timeline_name,
> > > > > +	.release = drm_mock_sched_hw_fence_release,
> > > > > +};
> > > > > +
> > > > > +static struct dma_fence *mock_sched_run_job(struct
> > > > > drm_sched_job
> > > > > *sched_job)
> > > > > +{
> > > > > +	struct drm_mock_scheduler *sched =
> > > > > +		drm_sched_to_mock_sched(sched_job->sched);
> > > > > +	struct drm_mock_sched_job *job =
> > > > > drm_sched_job_to_mock_job(sched_job);
> > > > > +
> > > > > +	dma_fence_init(&job->hw_fence,
> > > > > +		       &drm_mock_sched_hw_fence_ops,
> > > > > +		       &job->lock,
> > > > > +		       sched->hw_timeline.context,
> > > > > +		       atomic_inc_return(&sched-
> > > > > > hw_timeline.next_seqno));
> > > > > +
> > > > > +	dma_fence_get(&job->hw_fence); /* Reference for the
> > > > > job_list
> > > > > */
> > > > > +
> > > > > +	spin_lock_irq(&sched->lock);
> > > > > +	if (job->duration_us) {
> > > > > +		ktime_t prev_finish_at = 0;
> > > > > +
> > > > > +		if (!list_empty(&sched->job_list)) {
> > > > > +			struct drm_mock_sched_job *prev =
> > > > > +				list_last_entry(&sched-
> > > > > > job_list,
> > > > > typeof(*prev),
> > > > > +						link);
> > > > > +
> > > > > +			prev_finish_at = prev->finish_at;
> > > > > +		}
> > > > > +
> > > > > +		if (!prev_finish_at)
> > > > > +			prev_finish_at = ktime_get();
> > > > > +
> > > > > +		job->finish_at =
> > > > > ktime_add_us(prev_finish_at,
> > > > > job-
> > > > > > duration_us);
> > > > > +	}
> > > > > +	list_add_tail(&job->link, &sched->job_list);
> > > > > +	if (job->finish_at)
> > > > > +		hrtimer_start(&job->timer, job->finish_at,
> > > > > HRTIMER_MODE_ABS);
> > > > > +	spin_unlock_irq(&sched->lock);
> > > > > +
> > > > > +	return &job->hw_fence;
> > > > > +}
> > > > > +
> > > > > +static enum drm_gpu_sched_stat
> > > > > +mock_sched_timedout_job(struct drm_sched_job *sched_job)
> > > > > +{
> > > > > +	return DRM_GPU_SCHED_STAT_ENODEV;
> > > > > +}
> > > > > +
> > > > > +static void mock_sched_free_job(struct drm_sched_job
> > > > > *sched_job)
> > > > > +{
> > > > > +	drm_sched_job_cleanup(sched_job);
> > > > > +}
> > > > > +
> > > > > +static const struct drm_sched_backend_ops
> > > > > drm_mock_scheduler_ops
> > > > > = {
> > > > > +	.run_job = mock_sched_run_job,
> > > > > +	.timedout_job = mock_sched_timedout_job,
> > > > > +	.free_job = mock_sched_free_job
> > > > > +};
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_new_scheduler - Create a new mock scheduler
> > > > > + *
> > > > > + * @test: KUnit test owning the job
> > > > > + *
> > > > > + * Returns: New mock scheduler with allocation managed by
> > > > > the
> > > > > test
> > > > > + */
> > > > > +struct drm_mock_scheduler *drm_mock_new_scheduler(struct
> > > > > kunit
> > > > > *test)
> > > > > +{
> > > > > +	struct drm_sched_init_args args = {
> > > > > +		.ops		= &drm_mock_scheduler_ops,
> > > > > +		.num_rqs	= DRM_SCHED_PRIORITY_COUNT,
> > > > > +		.credit_limit	= U32_MAX,
> > > > > +		.hang_limit	= UINT_MAX,
> > 
> > Another question – I think we are in the process of deprecating the
> > hang limit since submitting the same broken job again and again and
> > expecting it suddenly to work is the classic definition of madness.
> > 
> > My feeling would be that it should be 1, since that is what we
> > expect
> > drivers to do.
> > 
> > Or is there a specific reason why you set it to MAX anyways?
> 
> No special reason and it doesn't matter really. It will only be 
> interesting once we add more detailed tests for timeout/ban handling,
> at 
> which point those tests will explicitly confiture what limit they
> want. 
> As such, for now, this only needs to be non-zero.

OK, then I'd say please set it to 1. This way we cannot forget later
and the scheduler will always behave as we want it to for all drivers.

I assume it will be a while until we really can remove the hang_limit
from our code base, so…

> 
> For the series as a whole, I've done the other small tweaks and am
> ready 
> to send out v3.

Cool!

P.

> 
> Regards,
> 
> Tvrtko
> 
> > 
> > 
> > > > > +		.timeout	= MAX_SCHEDULE_TIMEOUT,
> > > > > +		.name		= "drm-mock-scheduler",
> > > > > +	};
> > > > > +	struct drm_mock_scheduler *sched;
> > > > > +	int ret;
> > > > > +
> > > > > +	sched = kunit_kzalloc(test, sizeof(*sched),
> > > > > GFP_KERNEL);
> > > > > +	KUNIT_ASSERT_NOT_NULL(test, sched);
> > > > > +
> > > > > +	ret = drm_sched_init(&sched->base, &args);
> > > > > +	KUNIT_ASSERT_EQ(test, ret, 0);
> > > > > +
> > > > > +	sched->test = test;
> > > > > +	sched->hw_timeline.context =
> > > > > dma_fence_context_alloc(1);
> > > > > +	atomic_set(&sched->hw_timeline.next_seqno, 0);
> > > > > +	INIT_LIST_HEAD(&sched->job_list);
> > > > > +	spin_lock_init(&sched->lock);
> > > > > +
> > > > > +	return sched;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_scheduler_fini - Destroys a mock scheduler
> > > > > + *
> > > > > + * @sched: Scheduler to destroy
> > > > > + *
> > > > > + * To be used from the test cases once done with the
> > > > > scheduler.
> > > > > + */
> > > > > +void drm_mock_scheduler_fini(struct drm_mock_scheduler
> > > > > *sched)
> > > > > +{
> > > > > +	struct drm_mock_sched_job *job, *next;
> > > > > +	unsigned long flags;
> > > > > +	LIST_HEAD(signal);
> > > > > +
> > > > > +	spin_lock_irqsave(&sched->lock, flags);
> > > > > +	list_for_each_entry_safe(job, next, &sched-
> > > > > >job_list,
> > > > > link)
> > > > > +		list_move_tail(&job->link, &signal);
> > > > > +	spin_unlock_irqrestore(&sched->lock, flags);
> > > > > +
> > > > > +	list_for_each_entry(job, &signal, link) {
> > > > > +		hrtimer_cancel(&job->timer);
> > > > > +		dma_fence_put(&job->hw_fence);
> > > > > +	}
> > > > > +
> > > > > +	drm_sched_fini(&sched->base);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_sched_advance - Advances the mock scheduler
> > > > > timeline
> > > > > + *
> > > > > + * @sched: Scheduler timeline to advance
> > > > > + * @num: By how many jobs to advance
> > > > > + *
> > > > > + * Advancing the scheduler timeline by a number of seqnos
> > > > > will
> > > > > trigger
> > > > > + * signalling of the hardware fences and unlinking the jobs
> > > > > from
> > > > > the
> > > > > internal
> > > > > + * scheduler tracking.
> > > > > + *
> > > > > + * This can be used from test cases which want complete
> > > > > control
> > > > > of
> > > > > the simulated
> > > > > + * job execution timing. For example submitting one job with
> > > > > no
> > > > > set
> > > > > duration
> > > > > + * would never complete it before test cases advances the
> > > > > timeline
> > > > > by one.
> > > > > + */
> > > > > +unsigned int drm_mock_sched_advance(struct
> > > > > drm_mock_scheduler
> > > > > *sched,
> > > > > +				    unsigned int num)
> > > > > +{
> > > > > +	struct drm_mock_sched_job *job, *next;
> > > > > +	unsigned int found = 0;
> > > > > +	unsigned long flags;
> > > > > +	LIST_HEAD(signal);
> > > > > +
> > > > > +	spin_lock_irqsave(&sched->lock, flags);
> > > > > +	if (WARN_ON_ONCE(sched->hw_timeline.cur_seqno + num
> > > > > <
> > > > > +			 sched->hw_timeline.cur_seqno))
> > > > > +		goto unlock;
> > > > > +	sched->hw_timeline.cur_seqno += num;
> > > > > +	list_for_each_entry_safe(job, next, &sched-
> > > > > >job_list,
> > > > > link)
> > > > > {
> > > > > +		if (sched->hw_timeline.cur_seqno < job-
> > > > > > hw_fence.seqno)
> > > > > +			break;
> > > > > +
> > > > > +		list_move_tail(&job->link, &signal);
> > > > > +		found++;
> > > > > +	}
> > > > > +unlock:
> > > > > +	spin_unlock_irqrestore(&sched->lock, flags);
> > > > > +
> > > > > +	list_for_each_entry(job, &signal, link) {
> > > > > +		dma_fence_signal(&job->hw_fence);
> > > > > +		dma_fence_put(&job->hw_fence);
> > > > > +	}
> > > > > +
> > > > > +	return found;
> > > > > +}
> > > > > +
> > > > > +MODULE_DESCRIPTION("DRM mock scheduler and tests");
> > > > > +MODULE_LICENSE("GPL");
> > > > > diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h
> > > > > b/drivers/gpu/drm/scheduler/tests/sched_tests.h
> > > > > new file mode 100644
> > > > > index 000000000000..0614bc901dd1
> > > > > --- /dev/null
> > > > > +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h
> > > > > @@ -0,0 +1,218 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +
> > > > > +#ifndef _SCHED_TESTS_H_
> > > > > +#define _SCHED_TESTS_H_
> > > > > +
> > > > > +#include <kunit/test.h>
> > > > > +#include <linux/atomic.h>
> > > > > +#include <linux/dma-fence.h>
> > > > > +#include <linux/hrtimer.h>
> > > > > +#include <linux/ktime.h>
> > > > > +#include <linux/list.h>
> > > > > +#include <linux/atomic.h>
> > > > > +#include <linux/mutex.h>
> > > > > +#include <linux/types.h>
> > > > > +
> > > > > +#include <drm/gpu_scheduler.h>
> > > > > +
> > > > > +/*
> > > > > + * DOC: Mock DRM scheduler data structures
> > > > > + *
> > > > > + * drm_mock_* data structures are used to implement a mock
> > > > > "GPU".
> > > > > + *
> > > > > + * They subclass the core DRM scheduler objects and add
> > > > > their
> > > > > data
> > > > > on top, which
> > > > > + * enables tracking the submitted jobs and simulating their
> > > > > execution with the
> > > > > + * attributes as specified by the test case.
> > > > > + */
> > > > > +
> > > > > +/**
> > > > > + * struct drm_mock_scheduler - implements a trivial mock GPU
> > > > > execution engine
> > > > > + *
> > > > > + * @base: DRM scheduler base class
> > > > > + * @test: Backpointer to owning the kunit test case
> > > > > + * @lock: Lock to protect the simulated @hw_timeline and the
> > > > > @job_list
> > > > > + * @job_list: List of jobs submitted to the mock GPU
> > > > > + * @hw_timeline: Simulated hardware timeline has a @context,
> > > > > @next_seqno and
> > > > > + *		 @cur_seqno for implementing a struct
> > > > > dma_fence
> > > > > signaling the
> > > > > + *		 simulated job completion.
> > > > > + *
> > > > > + * Trivial mock GPU execution engine tracks submitted jobs
> > > > > and
> > > > > enables
> > > > > + * completing them strictly in submission order.
> > > > > + */
> > > > > +struct drm_mock_scheduler {
> > > > > +	struct drm_gpu_scheduler base;
> > > > > +
> > > > > +	struct kunit		*test;
> > > > > +
> > > > > +	spinlock_t		lock;
> > > > > +	struct list_head	job_list;
> > > > > +
> > > > > +	struct {
> > > > > +		u64		context;
> > > > > +		atomic_t	next_seqno;
> > > > > +		unsigned int	cur_seqno;
> > > > > +	} hw_timeline;
> > > > > +};
> > > > > +
> > > > > +/**
> > > > > + * struct drm_mock_sched_entity - implements a mock GPU
> > > > > sched
> > > > > entity
> > > > > + *
> > > > > + * @base: DRM scheduler entity base class
> > > > > + * @test: Backpointer to owning the kunit test case
> > > > > + *
> > > > > + * Mock GPU sched entity is used by the test cases to submit
> > > > > jobs to
> > > > > the mock
> > > > > + * scheduler.
> > > > > + */
> > > > > +struct drm_mock_sched_entity {
> > > > > +	struct drm_sched_entity base;
> > > > > +
> > > > > +	struct kunit		*test;
> > > > > +};
> > > > > +
> > > > > +/**
> > > > > + * struct drm_mock_sched_job - implements a mock GPU job
> > > > > + *
> > > > > + * @base: DRM sched job base class
> > > > > + * @link: List head element used by job tracking by the
> > > > > drm_mock_scheduler
> > > > > + * @timer: Timer used for simulating job execution duration
> > > > > + * @duration_us: Simulated job duration in micro seconds, or
> > > > > zero if
> > > > > in manual
> > > > > + *		 timeline advance mode
> > > > > + * @finish_at: Absolute time when the jobs with set duration
> > > > > will
> > > > > complete
> > > > > + * @lock: Lock used for @hw_fence
> > > > > + * @hw_fence: Fence returned to DRM scheduler as the
> > > > > hardware
> > > > > fence
> > > > 
> > > > Can you ellaborate a bit more about lock's purpose (here in the
> > > > thread
> > > > at first)?
> > > > 
> > > > The hw_fence has its own internal lock for adding callbacks and
> > > > the
> > > > like, so why is this one here necessary?
> > > 
> > > It _is_ the internal fence lock, the one that is passed to
> > > dma_fence_init().
> > 
> > Ah, right
> > 
> > > 
> > > > Furthermore, at least in this patch here, it seems that
> > > > whenever
> > > > hw_fence is changed, its being protected by the scheduler's
> > > > lock.
> > > > Why
> > > > is that?
> > > 
> > > I don't follow - what do you consider "hw_fence is changed"?
> > > 
> > > Hw_fence is initialized in the ->run_job() callback and is
> > > signalled
> > > outside the mock_sched->lock.
> > 
> > I meant when it's used. Forget about it, makes sense now.
> > 
> > > 
> > > > 
> > > > > + * @test: Backpointer to owning the kunit test case
> > > > > + *
> > > > > + * Mock GPU sched job is used by the test cases to submit
> > > > > jobs
> > > > > to
> > > > > the mock
> > > > > + * scheduler.
> > > > > + */
> > > > > +struct drm_mock_sched_job {
> > > > > +	struct drm_sched_job	base;
> > > > > +
> > > > > +	struct list_head	link;
> > > > > +	struct hrtimer		timer;
> > > > > +
> > > > > +	unsigned int		duration_us;
> > > > > +	ktime_t			finish_at;
> > > > > +
> > > > > +	spinlock_t		lock;
> > > > > +	struct dma_fence	hw_fence;
> > > > > +
> > > > > +	struct kunit		*test;
> > > > > +};
> > > > > +
> > > > > +static inline struct drm_mock_scheduler *
> > > > > +drm_sched_to_mock_sched(struct drm_gpu_scheduler *sched)
> > > > > +{
> > > > > +	return container_of(sched, struct
> > > > > drm_mock_scheduler,
> > > > > base);
> > > > > +};
> > > > > +
> > > > > +static inline struct drm_mock_sched_entity *
> > > > > +drm_sched_entity_to_mock_entity(struct drm_sched_entity
> > > > > *sched_entity)
> > > > > +{
> > > > > +	return container_of(sched_entity, struct
> > > > > drm_mock_sched_entity, base);
> > > > > +};
> > > > > +
> > > > > +static inline struct drm_mock_sched_job *
> > > > > +drm_sched_job_to_mock_job(struct drm_sched_job *sched_job)
> > > > > +{
> > > > > +	return container_of(sched_job, struct
> > > > > drm_mock_sched_job,
> > > > > base);
> > > > > +};
> > > > > +
> > > > > +struct drm_mock_scheduler *drm_mock_new_scheduler(struct
> > > > > kunit
> > > > > *test);
> > > > > +void drm_mock_scheduler_fini(struct drm_mock_scheduler
> > > > > *sched);
> > > > > +unsigned int drm_mock_sched_advance(struct
> > > > > drm_mock_scheduler
> > > > > *sched,
> > > > > +				    unsigned int num);
> > > > > +
> > > > > +struct drm_mock_sched_entity *
> > > > > +drm_mock_new_sched_entity(struct kunit *test,
> > > > > +			  enum drm_sched_priority priority,
> > > > > +			  struct drm_mock_scheduler *sched);
> > > > > +void drm_mock_sched_entity_free(struct drm_mock_sched_entity
> > > > > *entity);
> > > > > +
> > > > > +struct drm_mock_sched_job *
> > > > > +drm_mock_new_sched_job(struct kunit *test,
> > > > > +		       struct drm_mock_sched_entity
> > > > > *entity);
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_sched_job_submit - Arm and submit a job in one
> > > > > go
> > > > > + *
> > > > > + * @job: Job to arm and submit
> > > > > + */
> > > > > +static inline void drm_mock_sched_job_submit(struct
> > > > > drm_mock_sched_job *job)
> > > > > +{
> > > > > +	drm_sched_job_arm(&job->base);
> > > > > +	drm_sched_entity_push_job(&job->base);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_sched_job_set_duration_us - Set a job duration
> > > > > + *
> > > > > + * @job: Job to set the duration for
> > > > > + * @duration_us: Duration in micro seconds
> > > > > + *
> > > > > + * Jobs with duration set will be automatically completed by
> > > > > the
> > > > > mock scheduler
> > > > > + * as the timeline progress, unless a job without a set
> > > > > duration
> > > > > is
> > > > 
> > > > s/progress/progresses
> > > > 
> > > > > encountered
> > > > > + * in the timelime in which case calling
> > > > > drm_mock_sched_advance
> > > > > will
> > > > 
> > > > s/drm_mock_sched_advance/drm_mock_sched_advanace()
> > > > 
> > > > As I learned recently, the brackets are what actually teaches
> > > > the
> > > > doc
> > > > generator that this is a function that shall be linked in the
> > > > docu.
> > > 
> > > Both done.
> > > 
> > > > 
> > > > > be required
> > > > > + * to bump the timeline.
> > > > > + */
> > > > > +static inline void
> > > > > +drm_mock_sched_job_set_duration_us(struct drm_mock_sched_job
> > > > > *job,
> > > > > +				   unsigned int duration_us)
> > > > > +{
> > > > > +	job->duration_us = duration_us;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_sched_job_is_finished - Check if a job is
> > > > > finished
> > > > > + *
> > > > > + * @job: Job to check
> > > > > + *
> > > > > + * Returns: true if finished
> > > > > + */
> > > > > +static inline bool
> > > > > +drm_mock_sched_job_is_finished(struct drm_mock_sched_job
> > > > > *job)
> > > > > +{
> > > > > +	return dma_fence_is_signaled(&job->base.s_fence-
> > > > > > finished);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_sched_job_wait_finished - Wait until a job is
> > > > > finished
> > > > > + *
> > > > > + * @job: Job to wait for
> > > > > + * @timeout: Wait time in jiffies
> > > > > + *
> > > > > + * Returns: true if finished within the timeout provided,
> > > > > otherwise
> > > > > false
> > > > > + */
> > > > > +static inline bool
> > > > > +drm_mock_sched_job_wait_finished(struct drm_mock_sched_job
> > > > > *job,
> > > > > long timeout)
> > > > > +{
> > > > > +	long ret;
> > > > > +
> > > > > +	ret = dma_fence_wait_timeout(&job->base.s_fence-
> > > > > > finished,
> > > > > +				     false,
> > > > > +				     timeout);
> > > > > +
> > > > > +	return ret != 0;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * drm_mock_sched_job_wait_finished - Wait until a job is
> > > > > scheduled
> > > > > + *
> > > > > + * @job: Job to wait for
> > > > > + * @timeout: Wait time in jiffies
> > > > > + *
> > > > > + * Returns: true if scheduled within the timeout provided,
> > > > > otherwise
> > > > > false
> > > > > + */
> > > > > +static inline long
> > > > > +drm_mock_sched_job_wait_scheduled(struct drm_mock_sched_job
> > > > > *job,
> > > > > long timeout)
> > > > > +{
> > > > > +	long ret;
> > > > > +
> > > > > +	ret = dma_fence_wait_timeout(&job->base.s_fence-
> > > > > > scheduled,
> > > > > +				     false,
> > > > > +				     timeout);
> > > > > +
> > > > > +	return ret != 0;
> > > > > +}
> > > > > +
> > > > > +#endif
> > > > > diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > > > b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > > > new file mode 100644
> > > > > index 000000000000..c12368a22a39
> > > > > --- /dev/null
> > > > > +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > > > @@ -0,0 +1,197 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > +
> > > > > +#include "sched_tests.h"
> > > > > +
> > > > > +/*
> > > > > + * DRM scheduler basic tests should check the basic
> > > > > functional
> > > > > correctness of
> > > > > + * the scheduler, including some very light smoke testing.
> > > > > More
> > > > > targeted tests,
> > > > > + * for example focusing on testing specific bugs and other
> > > > > more
> > > > > complicated test
> > > > > + * scenarios, should be implemented in a separate source
> > > > > units.
> > > > 
> > > > s/a//
> > > 
> > > Done.
> > > 
> > > > 
> > > > > + */
> > > > > +
> > > > > +static int drm_sched_basic_init(struct kunit *test)
> > > > > +{
> > > > > +	test->priv = drm_mock_new_scheduler(test);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +static void drm_sched_basic_exit(struct kunit *test)
> > > > > +{
> > > > > +	struct drm_mock_scheduler *sched = test->priv;
> > > > > +
> > > > > +	drm_mock_scheduler_fini(sched);
> > > > > +}
> > > > > +
> > > > > +static void drm_sched_basic_submit(struct kunit *test)
> > > > > +{
> > > > > +	struct drm_mock_scheduler *sched = test->priv;
> > > > > +	struct drm_mock_sched_entity *entity;
> > > > > +	struct drm_mock_sched_job *job;
> > > > > +	unsigned int i;
> > > > > +	bool done;
> > > > > +
> > > > > +	/*
> > > > > +	 * Submit one job to the scheduler and verify that
> > > > > it
> > > > > gets
> > > > > scheduled
> > > > > +	 * and completed only when the mock hw backend
> > > > > processes
> > > > > it.
> > > > > +	 */
> > > > 
> > > > That seems to be the description of the entire function? If
> > > > yes, we
> > > > probably want it above the function's begin.
> > > 
> > > I find it more useful closer to the action so would prefer to
> > > keep it
> > > as
> > > is. But if you insist I can move.
> > 
> > It's not a dealbreaker
> > 
> > 
> > Greetings,
> > P.
> > 
> > 
> > > 
> > > Regards,
> > > 
> > > Tvrtko
> > > 
> > > > 
> > > > 
> > > > Thx
> > > > P.
> > > > 
> > > > > +
> > > > > +	entity = drm_mock_new_sched_entity(test,
> > > > > +					
> > > > > DRM_SCHED_PRIORITY_NORMAL,
> > > > > +					   sched);
> > > > > +	job = drm_mock_new_sched_job(test, entity);
> > > > > +
> > > > > +	drm_mock_sched_job_submit(job);
> > > > > +
> > > > > +	done = drm_mock_sched_job_wait_scheduled(job, HZ);
> > > > > +	KUNIT_ASSERT_EQ(test, done, true);
> > > > > +
> > > > > +	done = drm_mock_sched_job_wait_finished(job, HZ /
> > > > > 2);
> > > > > +	KUNIT_ASSERT_EQ(test, done, false);
> > > > > +
> > > > > +	i = drm_mock_sched_advance(sched, 1);
> > > > > +	KUNIT_ASSERT_EQ(test, i, 1);
> > > > > +
> > > > > +	done = drm_mock_sched_job_wait_finished(job, HZ);
> > > > > +	KUNIT_ASSERT_EQ(test, done, true);
> > > > > +
> > > > > +	drm_mock_sched_entity_free(entity);
> > > > > +}
> > > > > +
> > > > > +struct drm_sched_basic_params {
> > > > > +	const char *description;
> > > > > +	unsigned int queue_depth;
> > > > > +	unsigned int num_entities;
> > > > > +	unsigned int job_us;
> > > > > +	bool dep_chain;
> > > > > +};
> > > > > +
> > > > > +static const struct drm_sched_basic_params
> > > > > drm_sched_basic_cases[] =
> > > > > {
> > > > > +	{
> > > > > +		.description = "A queue of jobs in a single
> > > > > entity",
> > > > > +		.queue_depth = 100,
> > > > > +		.job_us = 1000,
> > > > > +		.num_entities = 1,
> > > > > +	},
> > > > > +	{
> > > > > +		.description = "A chain of dependent jobs
> > > > > across
> > > > > multiple entities",
> > > > > +		.queue_depth = 100,
> > > > > +		.job_us = 1000,
> > > > > +		.num_entities = 1,
> > > > > +		.dep_chain = true,
> > > > > +	},
> > > > > +	{
> > > > > +		.description = "Multiple independent job
> > > > > queues",
> > > > > +		.queue_depth = 100,
> > > > > +		.job_us = 1000,
> > > > > +		.num_entities = 4,
> > > > > +	},
> > > > > +	{
> > > > > +		.description = "Multiple inter-dependent job
> > > > > queues",
> > > > > +		.queue_depth = 100,
> > > > > +		.job_us = 1000,
> > > > > +		.num_entities = 4,
> > > > > +		.dep_chain = true,
> > > > > +	},
> > > > > +};
> > > > > +
> > > > > +static void
> > > > > +drm_sched_basic_desc(const struct drm_sched_basic_params
> > > > > *params,
> > > > > char *desc)
> > > > > +{
> > > > > +	strscpy(desc, params->description,
> > > > > KUNIT_PARAM_DESC_SIZE);
> > > > > +}
> > > > > +
> > > > > +KUNIT_ARRAY_PARAM(drm_sched_basic, drm_sched_basic_cases,
> > > > > drm_sched_basic_desc);
> > > > > +
> > > > > +static void drm_sched_basic_test(struct kunit *test)
> > > > > +{
> > > > > +	const struct drm_sched_basic_params *params = test-
> > > > > > param_value;
> > > > > +	struct drm_mock_scheduler *sched = test->priv;
> > > > > +	struct drm_mock_sched_job *job, *prev = NULL;
> > > > > +	struct drm_mock_sched_entity **entity;
> > > > > +	unsigned int i, cur_ent = 0;
> > > > > +	bool done;
> > > > > +
> > > > > +	entity = kunit_kcalloc(test, params->num_entities,
> > > > > sizeof(*entity),
> > > > > +			       GFP_KERNEL);
> > > > > +	KUNIT_ASSERT_NOT_NULL(test, entity);
> > > > > +
> > > > > +	for (i = 0; i < params->num_entities; i++)
> > > > > +		entity[i] = drm_mock_new_sched_entity(test,
> > > > > +						
> > > > > DRM_SCHED_PRIORITY_NORMAL,
> > > > > +						     
> > > > > sched);
> > > > > +
> > > > > +	for (i = 0; i < params->queue_depth; i++) {
> > > > > +		job = drm_mock_new_sched_job(test,
> > > > > entity[cur_ent++]);
> > > > > +		cur_ent %= params->num_entities;
> > > > > +		drm_mock_sched_job_set_duration_us(job,
> > > > > params-
> > > > > > job_us);
> > > > > +		if (params->dep_chain && prev)
> > > > > +			drm_sched_job_add_dependency(&job-
> > > > > >base,
> > > > > +						
> > > > > dma_fence_get(&prev->base.s_fence->finished));
> > > > > +		drm_mock_sched_job_submit(job);
> > > > > +		prev = job;
> > > > > +	}
> > > > > +
> > > > > +	done = drm_mock_sched_job_wait_finished(job, HZ);
> > > > > +	KUNIT_ASSERT_EQ(test, done, true);
> > > > > +
> > > > > +	for (i = 0; i < params->num_entities; i++)
> > > > > +		drm_mock_sched_entity_free(entity[i]);
> > > > > +}
> > > > > +
> > > > > +static void drm_sched_basic_entity_cleanup(struct kunit
> > > > > *test)
> > > > > +{
> > > > > +	struct drm_mock_sched_job *job, *mid, *prev = NULL;
> > > > > +	struct drm_mock_scheduler *sched = test->priv;
> > > > > +	struct drm_mock_sched_entity *entity[4];
> > > > > +	const unsigned int qd = 100;
> > > > > +	unsigned int i, cur_ent = 0;
> > > > > +	bool done;
> > > > > +
> > > > > +	/*
> > > > > +	 * Submit a queue of jobs across different entities
> > > > > with
> > > > > an
> > > > > explicit
> > > > > +	 * chain of dependencies between them and trigger
> > > > > entity
> > > > > cleanup while
> > > > > +	 * the queue is still being processed.
> > > > > +	 */
> > > > > +
> > > > > +	for (i = 0; i < ARRAY_SIZE(entity); i++)
> > > > > +		entity[i] = drm_mock_new_sched_entity(test,
> > > > > +						
> > > > > DRM_SCHED_PRIORITY_NORMAL,
> > > > > +						     
> > > > > sched);
> > > > > +
> > > > > +	for (i = 0; i < qd; i++) {
> > > > > +		job = drm_mock_new_sched_job(test,
> > > > > entity[cur_ent++]);
> > > > > +		cur_ent %= ARRAY_SIZE(entity);
> > > > > +		drm_mock_sched_job_set_duration_us(job,
> > > > > 1000);
> > > > > +		if (prev)
> > > > > +			drm_sched_job_add_dependency(&job-
> > > > > >base,
> > > > > +						
> > > > > dma_fence_get(&prev->base.s_fence->finished));
> > > > > +		drm_mock_sched_job_submit(job);
> > > > > +		if (i == qd / 2)
> > > > > +			mid = job;
> > > > > +		prev = job;
> > > > > +	}
> > > > > +
> > > > > +	done = drm_mock_sched_job_wait_finished(mid, HZ);
> > > > > +	KUNIT_ASSERT_EQ(test, done, true);
> > > > > +
> > > > > +	/* Exit with half of the queue still pending to be
> > > > > executed.
> > > > > */
> > > > > +	for (i = 0; i < ARRAY_SIZE(entity); i++)
> > > > > +		drm_mock_sched_entity_free(entity[i]);
> > > > > +}
> > > > > +
> > > > > +static struct kunit_case drm_sched_basic_tests[] = {
> > > > > +	KUNIT_CASE(drm_sched_basic_submit),
> > > > > +	KUNIT_CASE_PARAM(drm_sched_basic_test,
> > > > > drm_sched_basic_gen_params),
> > > > > +	KUNIT_CASE(drm_sched_basic_entity_cleanup),
> > > > > +	{}
> > > > > +};
> > > > > +
> > > > > +static struct kunit_suite drm_sched_basic = {
> > > > > +	.name = "drm_sched_basic_tests",
> > > > > +	.init = drm_sched_basic_init,
> > > > > +	.exit = drm_sched_basic_exit,
> > > > > +	.test_cases = drm_sched_basic_tests,
> > > > > +};
> > > > > +
> > > > > +kunit_test_suite(drm_sched_basic);
> > > > 
> > > 
> > 
>