On Wed, Jun 27, 2018 at 04:34:27PM +0200, Lucas Stach wrote: > When the hangcheck handler was replaced by the DRM scheduler timeout > handling we dropped the forward progress check, as this might allow > clients to hog the GPU for a long time with a big job. > > It turns out that even reasonably well behaved clients like the > Armada Xorg driver occasionally trip over the 500ms timeout. Bring > back the forward progress check to get rid of the userspace regression. > > We would still like to fix userspace to submit smaller batches > if possible, but that is for another day. > > Fixes: 6d7a20c07760 (drm/etnaviv: replace hangcheck with scheduler timeout) > Signed-off-by: Lucas Stach <l.stach@xxxxxxxxxxxxxx> Tested-by: Russell King <rmk+kernel@xxxxxxxxxxxxxxx> Thanks. > --- > drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 3 +++ > drivers/gpu/drm/etnaviv/etnaviv_sched.c | 24 ++++++++++++++++++++++++ > 2 files changed, 27 insertions(+) > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > index dd430f0f8ff5..90f17ff7888e 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > @@ -131,6 +131,9 @@ struct etnaviv_gpu { > struct work_struct sync_point_work; > int sync_point_event; > > + /* hang detection */ > + u32 hangcheck_dma_addr; > + > void __iomem *mmio; > int irq; > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > index a74eb57af15b..50d6b88cb7aa 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > @@ -10,6 +10,7 @@ > #include "etnaviv_gem.h" > #include "etnaviv_gpu.h" > #include "etnaviv_sched.h" > +#include "state.xml.h" > > static int etnaviv_job_hang_limit = 0; > module_param_named(job_hang_limit, etnaviv_job_hang_limit, int , 0444); > @@ -85,6 +86,29 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) > { > struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); > struct etnaviv_gpu *gpu = submit->gpu; > + u32 dma_addr; > + int change; > + > + /* > + * If the GPU managed to complete this jobs fence, the timout is > + * spurious. Bail out. > + */ > + if (fence_completed(gpu, submit->out_fence->seqno)) > + return; > + > + /* > + * If the GPU is still making forward progress on the front-end (which > + * should never loop) we shift out the timeout to give it a chance to > + * finish the job. > + */ > + dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); > + change = dma_addr - gpu->hangcheck_dma_addr; > + if (change < 0 || change > 16) { > + gpu->hangcheck_dma_addr = dma_addr; > + schedule_delayed_work(&sched_job->work_tdr, > + sched_job->sched->timeout); > + return; > + } > > /* block scheduler */ > kthread_park(gpu->sched.thread); > -- > 2.17.1 > -- RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ FTTC broadband for 0.8mile line in suburbia: sync at 13.8Mbps down 630kbps up According to speedtest.net: 13Mbps down 490kbps up _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel