We have a number of tasks that we like to run when idle and parking the GPU into a powersaving mode. A few of those tasks are using the global idle point as a convenient moment when all previous execution has been required (and so we know that the GPU is not still touching random user memory). However, on a busy system we are unlikely to see global idle points, and would prefer a much more incremental system of being able to retire after all current execution has completed. Enter the idle barrier and idle tasks. To determine a point in the future when all current tasks are complete, we schedule a new low priority request that will be executed after all current work is complete, and by imposing a barrier before all future work. We therefore know we retire that barrier, the GPU is no longer touching any memory released before the barrier was submitting allowing us to run a set of idle tasks clear of any dangling GPU references. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_gem.c | 90 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_request.c | 9 +++ drivers/gpu/drm/i915/i915_timeline.c | 3 + 4 files changed, 107 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d072f3369ee1..5ca77e2e53fb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2021,6 +2021,9 @@ struct drm_i915_private { */ struct delayed_work idle_work; + struct i915_gem_active idle_barrier; + struct list_head idle_tasks; + ktime_t last_init_time; struct i915_vma *scratch; @@ -3040,6 +3043,8 @@ void i915_gem_fini(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, unsigned int flags, long timeout); +void i915_gem_add_idle_task(struct drm_i915_private *i915, + struct i915_gem_active *idle); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 101a0f644787..0a8bcf6e7098 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -141,6 +141,15 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) return 0; } +static void call_idle_tasks(struct list_head *tasks) +{ + struct i915_gem_active *tsk, *tn; + + list_for_each_entry_safe(tsk, tn, tasks, link) + tsk->retire(tsk, NULL); + INIT_LIST_HEAD(tasks); +} + static u32 __i915_gem_park(struct drm_i915_private *i915) { intel_wakeref_t wakeref; @@ -169,6 +178,8 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) */ synchronize_irq(i915->drm.irq); + call_idle_tasks(&i915->gt.idle_tasks); + intel_engines_park(i915); i915_timelines_park(i915); @@ -2906,6 +2917,81 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) return active; } +static void idle_barrier(struct drm_i915_private *i915) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_timeline *tl; + struct i915_request *rq; + int err = 0; + + if (list_empty(&i915->gt.idle_tasks)) + return; + + if (!i915->gt.active_requests) { + call_idle_tasks(&i915->gt.idle_tasks); + return; + } + + /* Keep just one idle barrier in flight, amalgamating tasks instead */ + if (i915_gem_active_isset(&i915->gt.idle_barrier)) + return; + + GEM_TRACE("adding idle barrier\n"); + + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) + return; + + /* run after all current requests have executed, but before any new */ + mutex_lock(>->mutex); + list_for_each_entry(tl, >->active_list, link) { + struct i915_request *last; + + if (tl == rq->timeline) + continue; + + err = i915_timeline_set_barrier(tl, rq); + if (err == -EEXIST) + continue; + if (err) + break; + + last = i915_gem_active_raw(&tl->last_request, + &i915->drm.struct_mutex); + if (!last) + continue; + + mutex_unlock(>->mutex); /* allocation ahead! */ + err = i915_request_await_dma_fence(rq, &last->fence); + mutex_lock(>->mutex); + if (err) + break; + + /* restart after reacquiring the lock */ + tl = list_entry(>->active_list, typeof(*tl), link); + } + mutex_unlock(>->mutex); + + if (err == 0) { + list_splice_init(&i915->gt.idle_tasks, &rq->active_list); + i915_gem_active_set(&i915->gt.idle_barrier, rq); + } + + i915_request_add(rq); +} + +void i915_gem_add_idle_task(struct drm_i915_private *i915, + struct i915_gem_active *task) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_TRACE("adding idle task hint:%pS\n", task->retire); + + if (i915->gt.active_requests) + list_add(&task->link, &i915->gt.idle_tasks); + else + task->retire(task, NULL); +} + static void i915_gem_retire_work_handler(struct work_struct *work) { @@ -2916,6 +3002,7 @@ i915_gem_retire_work_handler(struct work_struct *work) /* Come back later if the device is busy... */ if (mutex_trylock(&dev->struct_mutex)) { i915_retire_requests(dev_priv); + idle_barrier(dev_priv); mutex_unlock(&dev->struct_mutex); } @@ -5182,6 +5269,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) /* Flush any outstanding unpin_work. */ i915_gem_drain_workqueue(dev_priv); + GEM_BUG_ON(!list_empty(&dev_priv->gt.idle_tasks)); mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(dev_priv); @@ -5302,6 +5390,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); + INIT_LIST_HEAD(&dev_priv->gt.idle_tasks); + init_request_active(&dev_priv->gt.idle_barrier, NULL); i915_gem_init__mm(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c09a6644a2ab..b397155fe8a7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -532,6 +532,11 @@ static int add_barrier(struct i915_request *rq, struct i915_gem_active *active) return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; } +static int add_idle_barrier(struct i915_request *rq) +{ + return add_barrier(rq, &rq->i915->gt.idle_barrier); +} + static int add_timeline_barrier(struct i915_request *rq) { return add_barrier(rq, &rq->timeline->barrier); @@ -679,6 +684,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; + ret = add_idle_barrier(rq); + if (ret) + goto err_unwind; + ret = add_timeline_barrier(rq); if (ret) goto err_unwind; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 8f5c57304064..60b2e1c3abf4 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -270,6 +270,9 @@ int i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq) /* Must maintain ordering wrt existing barriers */ old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex); if (old) { + if (old == rq) + return -EEXIST; + err = i915_request_await_dma_fence(rq, &old->fence); if (err) return err; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx