Since runtime suspend is very harsh on GTT mmappings (they all get zapped on suspend) keep the device awake while we the buffer remains in the GTT write domain (as we expect subsequent writes). We special case writes here, as the write domain is more bounded than the read domains; a buffer may remain in multiple read domains until it is written to, but a write from the GTT must be flushed prior to using it elsewhere (e.g. on the GPU). However, userspace can control the write-domain and although there is a soft contract that writes must be flushed (for e.g. flushing scanouts and fbc), in the worst case an idle buffer may keep the device alive until the buffer is destroyed. --- drivers/gpu/drm/i915/i915_gem.c | 12 +++++++++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++ drivers/gpu/drm/i915/i915_gem_object.h | 3 +++ drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +++- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 6 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e4cc08bc518c..553cc09e9ab3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) static void __start_cpu_write(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; if (cpu_write_needs_clflush(obj)) @@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } -static void +void flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (!(obj->base.write_domain & flush_domains)) return; @@ -695,15 +698,14 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { - intel_runtime_pm_get(dev_priv); spin_lock_irq(&dev_priv->uncore.lock); POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); } intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_runtime_pm_put(dev_priv); break; case I915_GEM_DOMAIN_CPU: @@ -3425,6 +3427,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); if (obj->cache_dirty) i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); obj->base.write_domain = 0; } @@ -3555,6 +3558,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; if (write) { + intel_runtime_pm_get_noresume(to_i915(obj->base.dev)); obj->base.read_domains = I915_GEM_DOMAIN_GTT; obj->base.write_domain = I915_GEM_DOMAIN_GTT; obj->mm.dirty = true; @@ -4394,6 +4398,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, trace_i915_gem_object_destroy(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8a9d37ac16d4..62c215eb38b7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1865,6 +1865,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, i915_gem_active_set(&vma->last_read[idx], req); list_move_tail(&vma->vm_link, &vma->vm->active_list); + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) + intel_runtime_pm_put(to_i915(obj->base.dev)); obj->base.write_domain = 0; if (flags & EXEC_OBJECT_WRITE) { obj->base.write_domain = I915_GEM_DOMAIN_RENDER; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..f5f52c4090b0 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -421,5 +421,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); +void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 77fb39808131..71110b7d3ca0 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { - if (i915_gem_object_unbind(obj) == 0) + if (i915_gem_object_unbind(obj) == 0) { + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + } return !READ_ONCE(obj->mm.pages); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d89e1b8e1cc5..357eee6f907c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine, i915_ggtt_offset(ce->ring->vma); ce->state->obj->mm.dirty = true; + flush_write_domain(ce->state->obj, ~0); i915_gem_context_get(ctx); out: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cdf084ef5aae..571a5b1f4f54 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1321,6 +1321,8 @@ int intel_ring_pin(struct intel_ring *ring, if (IS_ERR(addr)) goto err; + flush_write_domain(vma->obj, ~0); + ring->vaddr = addr; return 0; @@ -1516,6 +1518,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, goto err; ce->state->obj->mm.dirty = true; + flush_write_domain(ce->state->obj, ~0); } /* The kernel context is only used as a placeholder for flushing the -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx