On Fri, Sep 01, 2017 at 01:16:06PM +0100, Chris Wilson wrote: > Since runtime suspend is very harsh on GTT mmappings (they all get > zapped on suspend) keep the device awake while the buffer remains in > the GTT domain. However, userspace can control the domain and > although there is a soft contract that writes must be flushed (for e.g. > flushing scanouts and fbc), we are intentionally lax with respect to read > domains, allowing them to persist for as long as is feasible. > > We acquire a wakeref when using the buffer in the GEM domain so that all > subsequent operations on that object are fast, trying to avoid > suspending while the GTT is still in active use by userspace. To ensure > that the device can eventually suspend, we install a timer and expire the > GTT wakeref. So in effect we have just a fancy pm autosuspend that tries > to estimate the cost of restoring actively used GTT mmapping. Please tag with for-CI or something like that when throwing patches at the shards :-) At least that's what I assuming given lack of sob and revision of changes ... Thanks, Daniel > --- > drivers/gpu/drm/i915/i915_debugfs.c | 2 + > drivers/gpu/drm/i915/i915_drv.h | 11 ++++ > drivers/gpu/drm/i915/i915_gem.c | 103 ++++++++++++++++++++++++++++--- > drivers/gpu/drm/i915/i915_gem_object.h | 5 ++ > drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +- > drivers/gpu/drm/i915/intel_lrc.c | 1 + > drivers/gpu/drm/i915/intel_ringbuffer.c | 3 + > 7 files changed, 118 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 48572b157222..dbb07612aa5a 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) > if (!HAS_RUNTIME_PM(dev_priv)) > seq_puts(m, "Runtime power management not supported\n"); > > + seq_printf(m, "GTT wakeref count: %d\n", > + atomic_read(&dev_priv->mm.gtt_wakeref.count)); > seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake)); > seq_printf(m, "IRQs disabled: %s\n", > yesno(!intel_irqs_enabled(dev_priv))); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0383e879a315..14dcf6614f3c 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1460,6 +1460,17 @@ struct i915_gem_mm { > */ > struct list_head userfault_list; > > + /* List of all objects in gtt domain, holding a wakeref. > + * The list is reaped periodically, and protected by its own mutex. > + */ > + struct { > + struct mutex lock; > + struct list_head list; > + atomic_t count; > + > + struct delayed_work work; > + } gtt_wakeref; > + > /** > * List of objects which are pending destruction. > */ > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index e4cc08bc518c..09baf80889e8 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) > > static void __start_cpu_write(struct drm_i915_gem_object *obj) > { > + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); > obj->base.read_domains = I915_GEM_DOMAIN_CPU; > obj->base.write_domain = I915_GEM_DOMAIN_CPU; > if (cpu_write_needs_clflush(obj)) > @@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) > obj->frontbuffer_ggtt_origin : ORIGIN_CPU); > } > > -static void > +void > flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) > { > struct drm_i915_private *dev_priv = to_i915(obj->base.dev); > > + lockdep_assert_held(&dev_priv->drm.struct_mutex); > + > if (!(obj->base.write_domain & flush_domains)) > return; > > @@ -694,16 +697,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) > > switch (obj->base.write_domain) { > case I915_GEM_DOMAIN_GTT: > - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { > - intel_runtime_pm_get(dev_priv); > - spin_lock_irq(&dev_priv->uncore.lock); > - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); > - spin_unlock_irq(&dev_priv->uncore.lock); > - intel_runtime_pm_put(dev_priv); > - } > + mutex_lock(&dev_priv->mm.gtt_wakeref.lock); > + if (!list_empty(&obj->mm.gtt_wakeref_link)) { > + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { > + spin_lock_irq(&dev_priv->uncore.lock); > + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); > + spin_unlock_irq(&dev_priv->uncore.lock); > + } > > - intel_fb_obj_flush(obj, > - fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); > + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); > + > + list_del_init(&obj->mm.gtt_wakeref_link); > + } > + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock); > break; > > case I915_GEM_DOMAIN_CPU: > @@ -3425,6 +3431,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) > flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); > if (obj->cache_dirty) > i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); > + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); > obj->base.write_domain = 0; > } > > @@ -3501,6 +3508,49 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) > return 0; > } > > +static void wakeref_timeout(struct work_struct *work) > +{ > + struct drm_i915_private *dev_priv = > + container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work); > + struct drm_i915_gem_object *obj, *on; > + unsigned int count; > + > + mutex_lock(&dev_priv->mm.gtt_wakeref.lock); > + count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0); > + if (count) { > + unsigned long timeout; > + > + GEM_BUG_ON(count == -1); > + > + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { > + spin_lock_irq(&dev_priv->uncore.lock); > + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); > + spin_unlock_irq(&dev_priv->uncore.lock); > + } > + > + count = 0; > + list_for_each_entry_safe(obj, on, > + &dev_priv->mm.gtt_wakeref.list, > + mm.gtt_wakeref_link) { > + list_del_init(&obj->mm.gtt_wakeref_link); > + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) { > + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); > + obj->base.write_domain = 0; > + } > + count++; > + } > + > + timeout = HZ * (ilog2(count) + 1) / 2; > + mod_delayed_work(system_wq, > + &dev_priv->mm.gtt_wakeref.work, > + round_jiffies_up_relative(timeout)); > + } else { > + intel_runtime_pm_put(dev_priv); > + atomic_set(&dev_priv->mm.gtt_wakeref.count, -1); > + } > + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock); > +} > + > /** > * Moves a single object to the GTT read, and possibly write domain. > * @obj: object to act on > @@ -3512,6 +3562,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) > int > i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) > { > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > int ret; > > lockdep_assert_held(&obj->base.dev->struct_mutex); > @@ -3525,6 +3576,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) > if (ret) > return ret; > > + mutex_lock(&i915->mm.gtt_wakeref.lock); > + if (list_empty(&obj->mm.gtt_wakeref_link)) { > + if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) { > + intel_runtime_pm_get(i915); > + schedule_delayed_work(&i915->mm.gtt_wakeref.work, > + round_jiffies_up_relative(HZ)); > + } > + list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list); > + } > + mutex_unlock(&i915->mm.gtt_wakeref.lock); > + > if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) > return 0; > > @@ -4257,6 +4319,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, > > INIT_LIST_HEAD(&obj->global_link); > INIT_LIST_HEAD(&obj->userfault_link); > + INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link); > INIT_LIST_HEAD(&obj->vma_list); > INIT_LIST_HEAD(&obj->lut_list); > INIT_LIST_HEAD(&obj->batch_pool_link); > @@ -4394,6 +4457,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, > > trace_i915_gem_object_destroy(obj); > > + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); > + > + if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) { > + mutex_lock(&i915->mm.gtt_wakeref.lock); > + list_del(&obj->mm.gtt_wakeref_link); > + mutex_unlock(&i915->mm.gtt_wakeref.lock); > + } > + > GEM_BUG_ON(i915_gem_object_is_active(obj)); > list_for_each_entry_safe(vma, vn, > &obj->vma_list, obj_link) { > @@ -4548,6 +4619,9 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) > int ret; > > intel_runtime_pm_get(dev_priv); > + while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work)) > + ; > + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1); > intel_suspend_gt_powersave(dev_priv); > > mutex_lock(&dev->struct_mutex); > @@ -4932,6 +5006,11 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) > if (err) > goto err_priorities; > > + INIT_LIST_HEAD(&dev_priv->mm.gtt_wakeref.list); > + INIT_DELAYED_WORK(&dev_priv->mm.gtt_wakeref.work, wakeref_timeout); > + mutex_init(&dev_priv->mm.gtt_wakeref.lock); > + atomic_set(&dev_priv->mm.gtt_wakeref.count, -1); > + > INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); > init_llist_head(&dev_priv->mm.free_list); > INIT_LIST_HEAD(&dev_priv->mm.unbound_list); > @@ -4978,6 +5057,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) > WARN_ON(!list_empty(&dev_priv->gt.timelines)); > mutex_unlock(&dev_priv->drm.struct_mutex); > > + while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work)) > + ; > + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1); > + > kmem_cache_destroy(dev_priv->priorities); > kmem_cache_destroy(dev_priv->dependencies); > kmem_cache_destroy(dev_priv->requests); > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h > index c30d8f808185..3ca13edf32b6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_object.h > +++ b/drivers/gpu/drm/i915/i915_gem_object.h > @@ -177,6 +177,8 @@ struct drm_i915_gem_object { > struct mutex lock; /* protects this cache */ > } get_page; > > + struct list_head gtt_wakeref_link; > + > /** > * Advice: are the backing pages purgeable? > */ > @@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, > unsigned int cache_level); > void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); > > +void > +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains); > + > #endif > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > index 77fb39808131..71110b7d3ca0 100644 > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > @@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) > > static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) > { > - if (i915_gem_object_unbind(obj) == 0) > + if (i915_gem_object_unbind(obj) == 0) { > + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); > __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); > + } > return !READ_ONCE(obj->mm.pages); > } > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index d89e1b8e1cc5..357eee6f907c 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine, > i915_ggtt_offset(ce->ring->vma); > > ce->state->obj->mm.dirty = true; > + flush_write_domain(ce->state->obj, ~0); > > i915_gem_context_get(ctx); > out: > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index cdf084ef5aae..571a5b1f4f54 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1321,6 +1321,8 @@ int intel_ring_pin(struct intel_ring *ring, > if (IS_ERR(addr)) > goto err; > > + flush_write_domain(vma->obj, ~0); > + > ring->vaddr = addr; > return 0; > > @@ -1516,6 +1518,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, > goto err; > > ce->state->obj->mm.dirty = true; > + flush_write_domain(ce->state->obj, ~0); > } > > /* The kernel context is only used as a placeholder for flushing the > -- > 2.14.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx