On Fri, Aug 23, 2019 at 06:11:26PM +0100, Chris Wilson wrote: > obj->pin_global was original used as a means to keep the shrinker off > the active scanout, but we use the vma->pin_count itself for that and > the obj->frontbuffer to delay shrinking active framebuffers. The other > role that obj->pin_global gained was for spotting display objects inside > GEM and working harder to keep those coherent; for which we can again > simply inspect obj->frontbuffer directly. > > Coming up next, we will want to manipulate the pin_global counter > outside of the principle locks, so would need to make pin_global atomic. > However, since obj->frontbuffer is already managed atomically, it makes > sense to use that the primary key for display objects instead of having > pin_global. > > Ville pointed out the principle difference is that obj->frontbuffer is > set for as long as an intel_frambuffer is attached to an object, but > obj->pin_global was only raised for as long as the object was active. In > practice, this means that we consider the object as being on the scanout > for longer than is strictly required, causing us to be more proactive in > flushing -- though it should be true that we would have flushed > eventually when the back became the front, except that on the flip path > that flush is aync but when hit from another ioctl it will be > synchronous. > > v2: i915_gem_object_is_framebuffer() > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> Reviewed-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > --- > .../gpu/drm/i915/display/intel_frontbuffer.c | 13 +++++-- > drivers/gpu/drm/i915/gem/i915_gem_domain.c | 34 ++++++------------- > drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 +- > .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 -- > drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 15 +++----- > drivers/gpu/drm/i915/i915_debugfs.c | 12 ++----- > 6 files changed, 29 insertions(+), 50 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c > index 719379774fa5..fc40dc1fdbcc 100644 > --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c > +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c > @@ -220,11 +220,18 @@ static void frontbuffer_release(struct kref *ref) > { > struct intel_frontbuffer *front = > container_of(ref, typeof(*front), ref); > + struct drm_i915_gem_object *obj = front->obj; > + struct i915_vma *vma; > > - front->obj->frontbuffer = NULL; > - spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock); > + spin_lock(&obj->vma.lock); > + for_each_ggtt_vma(vma, obj) > + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; > + spin_unlock(&obj->vma.lock); > > - i915_gem_object_put(front->obj); > + obj->frontbuffer = NULL; > + spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock); > + > + i915_gem_object_put(obj); > kfree(front); > } > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c > index 9c58e8fac1d9..6af740a5e3db 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c > @@ -27,7 +27,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) > > void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) > { > - if (!READ_ONCE(obj->pin_global)) > + if (!i915_gem_object_is_framebuffer(obj)) > return; > > i915_gem_object_lock(obj); > @@ -422,12 +422,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > > assert_object_held(obj); > > - /* Mark the global pin early so that we account for the > - * display coherency whilst setting up the cache domains. > - */ > - obj->pin_global++; > - > - /* The display engine is not coherent with the LLC cache on gen6. As > + /* > + * The display engine is not coherent with the LLC cache on gen6. As > * a result, we make sure that the pinning that is about to occur is > * done with uncached PTEs. This is lowest common denominator for all > * chipsets. > @@ -439,12 +435,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > ret = i915_gem_object_set_cache_level(obj, > HAS_WT(to_i915(obj->base.dev)) ? > I915_CACHE_WT : I915_CACHE_NONE); > - if (ret) { > - vma = ERR_PTR(ret); > - goto err_unpin_global; > - } > + if (ret) > + return ERR_PTR(ret); > > - /* As the user may map the buffer once pinned in the display plane > + /* > + * As the user may map the buffer once pinned in the display plane > * (e.g. libkms for the bootup splash), we have to ensure that we > * always use map_and_fenceable for all scanout buffers. However, > * it may simply be too big to fit into mappable, in which case > @@ -461,22 +456,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); > if (IS_ERR(vma)) > - goto err_unpin_global; > + return vma; > > vma->display_alignment = max_t(u64, vma->display_alignment, alignment); > > __i915_gem_object_flush_for_display(obj); > > - /* It should now be out of any other write domains, and we can update > + /* > + * It should now be out of any other write domains, and we can update > * the domain values for our changes. > */ > obj->read_domains |= I915_GEM_DOMAIN_GTT; > > return vma; > - > -err_unpin_global: > - obj->pin_global--; > - return vma; > } > > static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) > @@ -514,12 +506,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) > > assert_object_held(obj); > > - if (WARN_ON(obj->pin_global == 0)) > - return; > - > - if (--obj->pin_global == 0) > - vma->display_alignment = I915_GTT_MIN_ALIGNMENT; > - > /* Bump the LRU to try and avoid premature eviction whilst flipping */ > i915_gem_object_bump_inactive_ggtt(obj); > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h > index 5efb9936e05b..29b9eddc4c7f 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h > @@ -406,7 +406,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) > if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) > return true; > > - return obj->pin_global; /* currently in use by HW, keep flushed */ > + /* Currently in use by HW (display engine)? Keep flushed. */ > + return i915_gem_object_is_framebuffer(obj); > } > > static inline void __start_cpu_write(struct drm_i915_gem_object *obj) > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index ede0eb4218a8..13b9dc0e1a89 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -152,8 +152,6 @@ struct drm_i915_gem_object { > > /** Count of VMA actually bound by this object */ > atomic_t bind_count; > - /** Count of how many global VMA are currently pinned for use by HW */ > - unsigned int pin_global; > > struct { > struct mutex lock; /* protects the pages and their use */ > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c > index edd21d14e64f..4e55cfc2b0dc 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c > @@ -61,7 +61,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) > if (!i915_gem_object_is_shrinkable(obj)) > return false; > > - /* Only report true if by unbinding the object and putting its pages > + /* > + * Only report true if by unbinding the object and putting its pages > * we can actually make forward progress towards freeing physical > * pages. > * > @@ -72,16 +73,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) > if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count)) > return false; > > - /* If any vma are "permanently" pinned, it will prevent us from > - * reclaiming the obj->mm.pages. We only allow scanout objects to claim > - * a permanent pin, along with a few others like the context objects. > - * To simplify the scan, and to avoid walking the list of vma under the > - * object, we just check the count of its permanently pinned. > - */ > - if (READ_ONCE(obj->pin_global)) > - return false; > - > - /* We can only return physical pages to the system if we can either > + /* > + * We can only return physical pages to the system if we can either > * discard the contents (because the user has marked them as being > * purgeable) or if we can move their contents out to swap. > */ > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index e103fcba6435..4aa7caa3aa28 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -77,11 +77,6 @@ static int i915_capabilities(struct seq_file *m, void *data) > return 0; > } > > -static char get_pin_flag(struct drm_i915_gem_object *obj) > -{ > - return obj->pin_global ? 'p' : ' '; > -} > - > static char get_tiling_flag(struct drm_i915_gem_object *obj) > { > switch (i915_gem_object_get_tiling(obj)) { > @@ -140,9 +135,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) > struct i915_vma *vma; > int pin_count = 0; > > - seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s", > + seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s", > &obj->base, > - get_pin_flag(obj), > get_tiling_flag(obj), > get_global_flag(obj), > get_pin_mapped_flag(obj), > @@ -221,8 +215,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) > seq_printf(m, " (pinned x %d)", pin_count); > if (obj->stolen) > seq_printf(m, " (stolen: %08llx)", obj->stolen->start); > - if (obj->pin_global) > - seq_printf(m, " (global)"); > + if (i915_gem_object_is_framebuffer(obj)) > + seq_printf(m, " (fb)"); > > engine = i915_gem_object_last_write_engine(obj); > if (engine) > -- > 2.23.0 -- Ville Syrjälä Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx