On Fri, Jun 03, 2016 at 05:55:28PM +0100, Chris Wilson wrote: > We are motivated to avoid using a bitfield for obj->active for a couple > of reasons. Firstly, we wish to document our lockless read of obj->active > using READ_ONCE inside i915_gem_busy_ioctl() and that requires an > integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code > when presented with a bitfield and that shows up high on the profiles of > request tracking (mainly due to excess memory traffic as it converts > the bitfield to a register and back and generates frequent AGI in the > process). AGI = address generator interlock, I guess gcc first dynamically computes the address, then loads the right blocks, frobs them and stores the blocks? Please elaborate (in the commit message), but if it's indeed this silly this makes sense. -Daniel > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 2 +- > drivers/gpu/drm/i915/i915_drv.h | 31 +++++++++++++++++++++++++++++- > drivers/gpu/drm/i915/i915_gem.c | 16 +++++++-------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++----- > drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++-- > drivers/gpu/drm/i915/i915_gem_userptr.c | 2 +- > 6 files changed, 48 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 355bbf895c22..9154919fdd56 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data) > > static char get_active_flag(struct drm_i915_gem_object *obj) > { > - return obj->active ? '*' : ' '; > + return i915_gem_object_is_active(obj) ? '*' : ' '; > } > > static char get_pin_flag(struct drm_i915_gem_object *obj) > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 236ade61cade..e72b7f35a98e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2136,12 +2136,16 @@ struct drm_i915_gem_object { > > struct list_head batch_pool_link; > > + unsigned long flags; > /** > * This is set if the object is on the active lists (has pending > * rendering and so a non-zero seqno), and is not set if it i s on > * inactive (ready to be unbound) list. > */ > - unsigned int active:I915_NUM_ENGINES; > +#define I915_BO_ACTIVE_SHIFT 0 > +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) > +#define __I915_BO_ACTIVE(bo) \ > + ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) > > /** > * This is set if the object has been written to since last bound > @@ -2288,6 +2292,31 @@ i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj) > } > __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); > > +static inline unsigned long > +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) > +{ > + return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; > +} > + > +static inline void > +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) > +{ > + obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT); > +} > + > +static inline void > +i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine) > +{ > + obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT)); > +} > + > +static inline bool > +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, > + int engine) > +{ > + return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT)); > +} > + > /* > * Optimised SGL iterator for GEM objects > */ > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 05425ae7c8a8..a8279a598c4b 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1126,7 +1126,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, > > lockdep_assert_held(&obj->base.dev->struct_mutex); > > - active_mask = obj->active; > + active_mask = i915_gem_object_is_active(obj); > if (!active_mask) > return 0; > > @@ -1165,7 +1165,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, > BUG_ON(!mutex_is_locked(&dev->struct_mutex)); > BUG_ON(!dev_priv->mm.interruptible); > > - active_mask = obj->active; > + active_mask = i915_gem_object_is_active(obj); > if (!active_mask) > return 0; > > @@ -2109,10 +2109,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active, > struct drm_i915_gem_object *obj = > container_of(active, struct drm_i915_gem_object, last_read[ring]); > > - GEM_BUG_ON((obj->active & (1 << ring)) == 0); > + GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, ring)); > > - obj->active &= ~(1 << ring); > - if (obj->active) > + i915_gem_object_unset_active(obj, ring); > + if (i915_gem_object_is_active(obj)) > return; > > /* Bump our place on the bound list to keep it roughly in LRU order > @@ -2383,7 +2383,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) > return -ENOENT; > } > > - if (!obj->active) > + if (!i915_gem_object_is_active(obj)) > goto out; > > for (i = 0; i < I915_NUM_ENGINES; i++) { > @@ -2472,7 +2472,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > > lockdep_assert_held(&obj->base.dev->struct_mutex); > > - active_mask = obj->active; > + active_mask = i915_gem_object_is_active(obj); > if (!active_mask) > return 0; > > @@ -3516,7 +3516,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, > * become non-busy without any further actions. > */ > args->busy = 0; > - if (obj->active) { > + if (i915_gem_object_is_active(obj)) { > struct drm_i915_gem_request *req; > int i; > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 69bf73b51df9..224265619f00 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -432,7 +432,7 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj, > > static bool object_is_idle(struct drm_i915_gem_object *obj) > { > - unsigned long active = obj->active; > + unsigned long active = i915_gem_object_is_active(obj); > int idx; > > for_each_active(active, idx) { > @@ -991,7 +991,7 @@ static int > i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > struct list_head *vmas) > { > - const unsigned other_rings = ~intel_engine_flag(req->engine); > + const unsigned other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT; > struct i915_vma *vma; > uint32_t flush_domains = 0; > bool flush_chipset = false; > @@ -1000,7 +1000,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > > - if (obj->active & other_rings) { > + if (obj->flags & other_rings) { > ret = i915_gem_object_sync(obj, req); > if (ret) > return ret; > @@ -1159,9 +1159,9 @@ void i915_vma_move_to_active(struct i915_vma *vma, > * add the active reference first and queue for it to be dropped > * *last*. > */ > - if (obj->active == 0) > + if (!i915_gem_object_is_active(obj)) > i915_gem_object_get(obj); > - obj->active |= 1 << idx; > + i915_gem_object_set_active(obj, idx); > i915_gem_active_set(&obj->last_read[idx], req); > > if (flags & EXEC_OBJECT_WRITE) { > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > index 71ad58836f48..5cbc4ee52c6d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > @@ -168,7 +168,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, > !is_vmalloc_addr(obj->mapping)) > continue; > > - if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active) > + if ((flags & I915_SHRINK_ACTIVE) == 0 && > + i915_gem_object_is_active(obj)) > continue; > > if (!can_release_pages(obj)) > @@ -253,7 +254,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) > count += obj->base.size >> PAGE_SHIFT; > > list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { > - if (!obj->active && can_release_pages(obj)) > + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) > count += obj->base.size >> PAGE_SHIFT; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > index e57521dbddc6..221792632290 100644 > --- a/drivers/gpu/drm/i915/i915_gem_userptr.c > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > @@ -67,7 +67,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) > struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; > int i, n; > > - if (!obj->active) > + if (!i915_gem_object_is_active(obj)) > return; > > n = 0; > -- > 2.8.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx