We are motivated to avoid using a bitfield for obj->active for a couple of reasons. Firstly, we wish to document our lockless read of obj->active using READ_ONCE inside i915_gem_busy_ioctl() and that requires an integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code when presented with a bitfield and that shows up high on the profiles of request tracking (mainly due to excess memory traffic as it converts the bitfield to a register and back and generates frequent AGI in the process). Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 31 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem.c | 16 +++++++-------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++----- drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++-- drivers/gpu/drm/i915/i915_gem_userptr.c | 2 +- 6 files changed, 48 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 355bbf895c22..9154919fdd56 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data) static char get_active_flag(struct drm_i915_gem_object *obj) { - return obj->active ? '*' : ' '; + return i915_gem_object_is_active(obj) ? '*' : ' '; } static char get_pin_flag(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 236ade61cade..e72b7f35a98e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2136,12 +2136,16 @@ struct drm_i915_gem_object { struct list_head batch_pool_link; + unsigned long flags; /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on * inactive (ready to be unbound) list. */ - unsigned int active:I915_NUM_ENGINES; +#define I915_BO_ACTIVE_SHIFT 0 +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) +#define __I915_BO_ACTIVE(bo) \ + ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** * This is set if the object has been written to since last bound @@ -2288,6 +2292,31 @@ i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj) } __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); +static inline unsigned long +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +{ + return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; +} + +static inline void +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT); +} + +static inline void +i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT)); +} + +static inline bool +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, + int engine) +{ + return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT)); +} + /* * Optimised SGL iterator for GEM objects */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 05425ae7c8a8..a8279a598c4b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1126,7 +1126,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - active_mask = obj->active; + active_mask = i915_gem_object_is_active(obj); if (!active_mask) return 0; @@ -1165,7 +1165,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, BUG_ON(!mutex_is_locked(&dev->struct_mutex)); BUG_ON(!dev_priv->mm.interruptible); - active_mask = obj->active; + active_mask = i915_gem_object_is_active(obj); if (!active_mask) return 0; @@ -2109,10 +2109,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, struct drm_i915_gem_object, last_read[ring]); - GEM_BUG_ON((obj->active & (1 << ring)) == 0); + GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, ring)); - obj->active &= ~(1 << ring); - if (obj->active) + i915_gem_object_unset_active(obj, ring); + if (i915_gem_object_is_active(obj)) return; /* Bump our place on the bound list to keep it roughly in LRU order @@ -2383,7 +2383,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -ENOENT; } - if (!obj->active) + if (!i915_gem_object_is_active(obj)) goto out; for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -2472,7 +2472,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - active_mask = obj->active; + active_mask = i915_gem_object_is_active(obj); if (!active_mask) return 0; @@ -3516,7 +3516,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * become non-busy without any further actions. */ args->busy = 0; - if (obj->active) { + if (i915_gem_object_is_active(obj)) { struct drm_i915_gem_request *req; int i; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 69bf73b51df9..224265619f00 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -432,7 +432,7 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj, static bool object_is_idle(struct drm_i915_gem_object *obj) { - unsigned long active = obj->active; + unsigned long active = i915_gem_object_is_active(obj); int idx; for_each_active(active, idx) { @@ -991,7 +991,7 @@ static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned other_rings = ~intel_engine_flag(req->engine); + const unsigned other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT; struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; @@ -1000,7 +1000,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - if (obj->active & other_rings) { + if (obj->flags & other_rings) { ret = i915_gem_object_sync(obj, req); if (ret) return ret; @@ -1159,9 +1159,9 @@ void i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (obj->active == 0) + if (!i915_gem_object_is_active(obj)) i915_gem_object_get(obj); - obj->active |= 1 << idx; + i915_gem_object_set_active(obj, idx); i915_gem_active_set(&obj->last_read[idx], req); if (flags & EXEC_OBJECT_WRITE) { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 71ad58836f48..5cbc4ee52c6d 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -168,7 +168,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, !is_vmalloc_addr(obj->mapping)) continue; - if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active) + if ((flags & I915_SHRINK_ACTIVE) == 0 && + i915_gem_object_is_active(obj)) continue; if (!can_release_pages(obj)) @@ -253,7 +254,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (!obj->active && can_release_pages(obj)) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e57521dbddc6..221792632290 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -67,7 +67,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; int i, n; - if (!obj->active) + if (!i915_gem_object_is_active(obj)) return; n = 0; -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx