Since runtime suspend is very harsh on GTT mmappings (they all get zapped on suspend) keep the device awake while the buffer remains in the GTT domain. However, userspace can control the domain and although there is a soft contract that writes must be flushed (for e.g. flushing scanouts and fbc), we are intentionally lax with respect to read domains, allowing them to persist for as long as is feasible. We acquire a wakeref when using the buffer in the GEM domain so that all subsequent operations on that object are fast, trying to avoid suspending while the GTT is still in active use by userspace. To ensure that the device can eventually suspend, we install a timer and expire the GTT wakeref. So in effect we have just a fancy pm autosuspend that tries to estimate the cost of restoring actively used GTT mmapping. v2: Don't forget to setup/cleanup selftests. References: https://bugs.freedesktop.org/show_bug.cgi?id=102490 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 12 +++ drivers/gpu/drm/i915/i915_gem.c | 122 +++++++++++++++++++---- drivers/gpu/drm/i915/i915_gem_object.h | 5 + drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 6 ++ 7 files changed, 134 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2f691cc7b563..9bd082957202 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2812,6 +2812,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) if (!HAS_RUNTIME_PM(dev_priv)) seq_puts(m, "Runtime power management not supported\n"); + seq_printf(m, "GTT wakeref count: %d\n", + atomic_read(&dev_priv->mm.gtt_wakeref.count)); seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake)); seq_printf(m, "IRQs disabled: %s\n", yesno(!intel_irqs_enabled(dev_priv))); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 28ad5dadbb18..ebacd046736e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1487,6 +1487,18 @@ struct i915_gem_mm { */ struct list_head userfault_list; + /* List of all objects in gtt domain, holding a wakeref. + * The list is reaped periodically, and protected by its own mutex. + * Entries on this list do not carry a reference. + */ + struct { + struct mutex lock; + struct list_head list; + atomic_t count; /* -1 == no timeout scheduled */ + + struct delayed_work work; + } gtt_wakeref; + /** * List of objects which are pending destruction. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d98f7b25d395..253bb893c17e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) static void __start_cpu_write(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; if (cpu_write_needs_clflush(obj)) @@ -667,11 +668,14 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } -static void -flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +void +i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, + unsigned int flush_domains) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (!(obj->base.write_domain & flush_domains)) return; @@ -694,16 +698,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: - if (!HAS_LLC(dev_priv)) { - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } + mutex_lock(&dev_priv->mm.gtt_wakeref.lock); + if (!list_empty(&obj->mm.gtt_wakeref_link)) { + if (!HAS_LLC(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + } - intel_fb_obj_flush(obj, - fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + + list_del_init(&obj->mm.gtt_wakeref_link); + } + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock); break; case I915_GEM_DOMAIN_CPU: @@ -808,7 +815,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, goto out; } - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -861,7 +868,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, goto out; } - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). @@ -3423,9 +3430,10 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) * We manually flush the CPU domain so that we can override and * force the flush for the display, and perform it asyncrhonously. */ - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); if (obj->cache_dirty) i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); obj->base.write_domain = 0; } @@ -3478,7 +3486,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3502,6 +3510,49 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) return 0; } +static void wakeref_timeout(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work); + struct drm_i915_gem_object *obj, *on; + unsigned int count; + + mutex_lock(&dev_priv->mm.gtt_wakeref.lock); + count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0); + if (count) { + unsigned long timeout; + + GEM_BUG_ON(count == -1); + + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + } + + timeout = 0; + list_for_each_entry_safe(obj, on, + &dev_priv->mm.gtt_wakeref.list, + mm.gtt_wakeref_link) { + list_del_init(&obj->mm.gtt_wakeref_link); + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) { + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + obj->base.write_domain = 0; + timeout++; + } + timeout++; + } + + timeout = HZ * (ilog2(timeout) + 1) / 2; + schedule_delayed_work(&dev_priv->mm.gtt_wakeref.work, + round_jiffies_up_relative(timeout)); + } else { + intel_runtime_pm_put(dev_priv); + atomic_set(&dev_priv->mm.gtt_wakeref.count, -1); + } + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock); +} + /** * Moves a single object to the GTT read, and possibly write domain. * @obj: object to act on @@ -3513,6 +3564,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3526,6 +3578,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; + mutex_lock(&i915->mm.gtt_wakeref.lock); + if (list_empty(&obj->mm.gtt_wakeref_link)) { + if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) { + intel_runtime_pm_get(i915); + schedule_delayed_work(&i915->mm.gtt_wakeref.work, + round_jiffies_up_relative(HZ)); + } + list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list); + } + mutex_unlock(&i915->mm.gtt_wakeref.lock); + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) return 0; @@ -3541,7 +3604,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3909,7 +3972,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { @@ -4258,6 +4321,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->userfault_link); + INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4395,6 +4459,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, trace_i915_gem_object_destroy(obj); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { @@ -4417,6 +4483,12 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); + if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) { + mutex_lock(&i915->mm.gtt_wakeref.lock); + list_del(&obj->mm.gtt_wakeref_link); + mutex_unlock(&i915->mm.gtt_wakeref.lock); + } + if (obj->ops->release) obj->ops->release(obj); @@ -4549,6 +4621,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) int ret; intel_runtime_pm_get(dev_priv); + drain_delayed_work(&dev_priv->mm.gtt_wakeref.work); + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1); intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); @@ -4891,6 +4965,15 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) i915_gem_detect_bit_6_swizzle(dev_priv); } +static void +i915_gem_load_init__wakeref(struct drm_i915_private *i915) +{ + INIT_LIST_HEAD(&i915->mm.gtt_wakeref.list); + INIT_DELAYED_WORK(&i915->mm.gtt_wakeref.work, wakeref_timeout); + mutex_init(&i915->mm.gtt_wakeref.lock); + atomic_set(&i915->mm.gtt_wakeref.count, -1); +} + int i915_gem_load_init(struct drm_i915_private *dev_priv) { @@ -4932,6 +5015,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (err) goto err_priorities; + i915_gem_load_init__wakeref(dev_priv); + INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); init_llist_head(&dev_priv->mm.free_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); @@ -4978,6 +5063,9 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) WARN_ON(!list_empty(&dev_priv->gt.timelines)); mutex_unlock(&dev_priv->drm.struct_mutex); + drain_delayed_work(&dev_priv->mm.gtt_wakeref.work); + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1); + kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); kmem_cache_destroy(dev_priv->requests); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..5797b75a804b 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -177,6 +177,8 @@ struct drm_i915_gem_object { struct mutex lock; /* protects this cache */ } get_page; + struct list_head gtt_wakeref_link; + /** * Advice: are the backing pages purgeable? */ @@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); +void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, + unsigned int flush_domains); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 77fb39808131..8d146584ee49 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { - if (i915_gem_object_unbind(obj) == 0) + if (i915_gem_object_unbind(obj) == 0) { + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + } return !READ_ONCE(obj->mm.pages); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 8f011c447e41..f3f6588afbdb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 678723430d78..ca260e4658af 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -67,6 +67,9 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_fini(i915); mutex_unlock(&i915->drm.struct_mutex); + drain_delayed_work(&i915->mm.gtt_wakeref.work); + WARN_ON(atomic_read(&i915->mm.gtt_wakeref.count) != -1); + drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); @@ -166,6 +169,7 @@ struct drm_i915_private *mock_gem_device(void) drm_mode_config_init(&i915->drm); mkwrite_device_info(i915)->gen = -1; + mkwrite_device_info(i915)->has_llc = true; spin_lock_init(&i915->mm.object_stat_lock); mock_uncore_init(i915); @@ -177,6 +181,8 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->wq) goto put_device; + i915_gem_load_init__wakeref(i915); + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); init_llist_head(&i915->mm.free_list); INIT_LIST_HEAD(&i915->mm.unbound_list); -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx