Since runtime suspend is very harsh on GTT mmappings (they all get zapped on suspend) keep the device awake while the buffer remains in the GTT domain. However, userspace can control the domain and although there is a soft contract that writes must be flushed (for e.g. flushing scanouts and fbc), we are intentionally lax with respect to read domains, allowing them to persist for as long as is feasible. We acquire a wakeref when using the buffer in the GEM domain so that all subsequent operations on that object are fast, trying to avoid suspending while the GTT is still in active use by userspace. To ensure that the device can eventually suspend, we install a timer and expire the GTT wakeref. So in effect we have just a fancy pm autosuspend that tries to estimate the cost of restoring actively used GTT mmapping. --- drivers/gpu/drm/i915/i915_debugfs.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 11 ++++ drivers/gpu/drm/i915/i915_gem.c | 103 ++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_object.h | 5 ++ drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 3 + 7 files changed, 118 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 48572b157222..dbb07612aa5a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) if (!HAS_RUNTIME_PM(dev_priv)) seq_puts(m, "Runtime power management not supported\n"); + seq_printf(m, "GTT wakeref count: %d\n", + atomic_read(&dev_priv->mm.gtt_wakeref.count)); seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake)); seq_printf(m, "IRQs disabled: %s\n", yesno(!intel_irqs_enabled(dev_priv))); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0383e879a315..14dcf6614f3c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1460,6 +1460,17 @@ struct i915_gem_mm { */ struct list_head userfault_list; + /* List of all objects in gtt domain, holding a wakeref. + * The list is reaped periodically, and protected by its own mutex. + */ + struct { + struct mutex lock; + struct list_head list; + atomic_t count; + + struct delayed_work work; + } gtt_wakeref; + /** * List of objects which are pending destruction. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e4cc08bc518c..09baf80889e8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) static void __start_cpu_write(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; if (cpu_write_needs_clflush(obj)) @@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } -static void +void flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (!(obj->base.write_domain & flush_domains)) return; @@ -694,16 +697,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } + mutex_lock(&dev_priv->mm.gtt_wakeref.lock); + if (!list_empty(&obj->mm.gtt_wakeref_link)) { + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + } - intel_fb_obj_flush(obj, - fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + + list_del_init(&obj->mm.gtt_wakeref_link); + } + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock); break; case I915_GEM_DOMAIN_CPU: @@ -3425,6 +3431,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); if (obj->cache_dirty) i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); obj->base.write_domain = 0; } @@ -3501,6 +3508,49 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) return 0; } +static void wakeref_timeout(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work); + struct drm_i915_gem_object *obj, *on; + unsigned int count; + + mutex_lock(&dev_priv->mm.gtt_wakeref.lock); + count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0); + if (count) { + unsigned long timeout; + + GEM_BUG_ON(count == -1); + + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + } + + count = 0; + list_for_each_entry_safe(obj, on, + &dev_priv->mm.gtt_wakeref.list, + mm.gtt_wakeref_link) { + list_del_init(&obj->mm.gtt_wakeref_link); + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) { + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + obj->base.write_domain = 0; + } + count++; + } + + timeout = HZ * (ilog2(count) + 1) / 2; + mod_delayed_work(system_wq, + &dev_priv->mm.gtt_wakeref.work, + round_jiffies_up_relative(timeout)); + } else { + intel_runtime_pm_put(dev_priv); + atomic_set(&dev_priv->mm.gtt_wakeref.count, -1); + } + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock); +} + /** * Moves a single object to the GTT read, and possibly write domain. * @obj: object to act on @@ -3512,6 +3562,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3525,6 +3576,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; + mutex_lock(&i915->mm.gtt_wakeref.lock); + if (list_empty(&obj->mm.gtt_wakeref_link)) { + if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) { + intel_runtime_pm_get(i915); + schedule_delayed_work(&i915->mm.gtt_wakeref.work, + round_jiffies_up_relative(HZ)); + } + list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list); + } + mutex_unlock(&i915->mm.gtt_wakeref.lock); + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) return 0; @@ -4257,6 +4319,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->userfault_link); + INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4394,6 +4457,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, trace_i915_gem_object_destroy(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + + if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) { + mutex_lock(&i915->mm.gtt_wakeref.lock); + list_del(&obj->mm.gtt_wakeref_link); + mutex_unlock(&i915->mm.gtt_wakeref.lock); + } + GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { @@ -4548,6 +4619,9 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) int ret; intel_runtime_pm_get(dev_priv); + while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work)) + ; + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1); intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); @@ -4932,6 +5006,11 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (err) goto err_priorities; + INIT_LIST_HEAD(&dev_priv->mm.gtt_wakeref.list); + INIT_DELAYED_WORK(&dev_priv->mm.gtt_wakeref.work, wakeref_timeout); + mutex_init(&dev_priv->mm.gtt_wakeref.lock); + atomic_set(&dev_priv->mm.gtt_wakeref.count, -1); + INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); init_llist_head(&dev_priv->mm.free_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); @@ -4978,6 +5057,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) WARN_ON(!list_empty(&dev_priv->gt.timelines)); mutex_unlock(&dev_priv->drm.struct_mutex); + while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work)) + ; + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1); + kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); kmem_cache_destroy(dev_priv->requests); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..3ca13edf32b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -177,6 +177,8 @@ struct drm_i915_gem_object { struct mutex lock; /* protects this cache */ } get_page; + struct list_head gtt_wakeref_link; + /** * Advice: are the backing pages purgeable? */ @@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); +void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 77fb39808131..71110b7d3ca0 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { - if (i915_gem_object_unbind(obj) == 0) + if (i915_gem_object_unbind(obj) == 0) { + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + } return !READ_ONCE(obj->mm.pages); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d89e1b8e1cc5..357eee6f907c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine, i915_ggtt_offset(ce->ring->vma); ce->state->obj->mm.dirty = true; + flush_write_domain(ce->state->obj, ~0); i915_gem_context_get(ctx); out: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cdf084ef5aae..571a5b1f4f54 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1321,6 +1321,8 @@ int intel_ring_pin(struct intel_ring *ring, if (IS_ERR(addr)) goto err; + flush_write_domain(vma->obj, ~0); + ring->vaddr = addr; return 0; @@ -1516,6 +1518,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, goto err; ce->state->obj->mm.dirty = true; + flush_write_domain(ce->state->obj, ~0); } /* The kernel context is only used as a placeholder for flushing the -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx