On Thu, May 02, 2024 at 08:38:12PM +0200, Boris Brezillon wrote: > We need to undo what was done in panthor_sched_pre_reset() even if the > reset failed. We just flag all previously running groups as terminated > when that happens to unblock things. > > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> Reviewed-by: Liviu Dudau <liviu.dudau@xxxxxxx> > --- > drivers/gpu/drm/panthor/panthor_device.c | 7 +------ > drivers/gpu/drm/panthor/panthor_sched.c | 19 ++++++++++++++----- > drivers/gpu/drm/panthor/panthor_sched.h | 2 +- > 3 files changed, 16 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c > index 4c5b54e7abb7..4082c8f2951d 100644 > --- a/drivers/gpu/drm/panthor/panthor_device.c > +++ b/drivers/gpu/drm/panthor/panthor_device.c > @@ -129,13 +129,8 @@ static void panthor_device_reset_work(struct work_struct *work) > panthor_gpu_l2_power_on(ptdev); > panthor_mmu_post_reset(ptdev); > ret = panthor_fw_post_reset(ptdev); > - if (ret) > - goto out_dev_exit; > - > atomic_set(&ptdev->reset.pending, 0); > - panthor_sched_post_reset(ptdev); > - > -out_dev_exit: > + panthor_sched_post_reset(ptdev, ret != 0); > drm_dev_exit(cookie); > > if (ret) { > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index 6ea094b00cf9..fc43ff62c77d 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -2728,15 +2728,22 @@ void panthor_sched_pre_reset(struct panthor_device *ptdev) > mutex_unlock(&sched->reset.lock); > } > > -void panthor_sched_post_reset(struct panthor_device *ptdev) > +void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed) > { > struct panthor_scheduler *sched = ptdev->scheduler; > struct panthor_group *group, *group_tmp; > > mutex_lock(&sched->reset.lock); > > - list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) > + list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) { > + /* Consider all previously running group as terminated if the > + * reset failed. > + */ > + if (reset_failed) > + group->state = PANTHOR_CS_GROUP_TERMINATED; > + > panthor_group_start(group); > + } > > /* We're done resetting the GPU, clear the reset.in_progress bit so we can > * kick the scheduler. > @@ -2744,9 +2751,11 @@ void panthor_sched_post_reset(struct panthor_device *ptdev) > atomic_set(&sched->reset.in_progress, false); > mutex_unlock(&sched->reset.lock); > > - sched_queue_delayed_work(sched, tick, 0); > - > - sched_queue_work(sched, sync_upd); > + /* No need to queue a tick and update syncs if the reset failed. */ > + if (!reset_failed) { > + sched_queue_delayed_work(sched, tick, 0); > + sched_queue_work(sched, sync_upd); > + } > } > > static void group_sync_upd_work(struct work_struct *work) > diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h > index 66438b1f331f..3a30d2328b30 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.h > +++ b/drivers/gpu/drm/panthor/panthor_sched.h > @@ -40,7 +40,7 @@ void panthor_group_pool_destroy(struct panthor_file *pfile); > int panthor_sched_init(struct panthor_device *ptdev); > void panthor_sched_unplug(struct panthor_device *ptdev); > void panthor_sched_pre_reset(struct panthor_device *ptdev); > -void panthor_sched_post_reset(struct panthor_device *ptdev); > +void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed); > void panthor_sched_suspend(struct panthor_device *ptdev); > void panthor_sched_resume(struct panthor_device *ptdev); > > -- > 2.44.0 > -- ==================== | I would like to | | fix the world, | | but they're not | | giving me the | \ source code! / --------------- ¯\_(ツ)_/¯