If we restrict ourselves to only using a cacheline for each timeline's HWSP (we could go smaller, but want to avoid needless polluting cachelines on different engines between different contexts), then we can suballocate a single 4k page into 64 different timeline HWSP. By treating each fresh allocation as a slab of 64 entries, we can keep it around for the next 64 allocation attempts until we need to refresh the slab cache. John Harrison noted the issue of fragmentation leading to the same worst case performance of one page per timeline as before, which can be mitigated by adopting a freelist. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: John Harrison <John.C.Harrison@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_timeline.c | 80 ++++++++++++++++++++++++---- 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3913900600b7..d59228dabb6e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1978,6 +1978,11 @@ struct drm_i915_private { struct i915_gt_timelines { struct mutex mutex; /* protects list, tainted by GPU */ struct list_head list; + + /* Pack multiple timelines' seqnos into the same page */ + spinlock_t hwsp_lock; + struct i915_vma *hwsp; + u64 bitmap; } timelines; struct list_head active_rings; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 380f4d25fb89..e939a9e1a4ab 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -11,26 +11,73 @@ static int hwsp_alloc(struct i915_timeline *timeline) { +#define NBITS BITS_PER_TYPE(typeof(gt->bitmap)) struct drm_i915_private *i915 = timeline->i915; - struct drm_i915_gem_object *bo; + struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_vma *vma; + int offset; + + spin_lock(>->hwsp_lock); + +restart: + offset = find_first_bit((unsigned long *)>->bitmap, NBITS); + if (offset == NBITS && gt->hwsp) { + i915_vma_put(gt->hwsp); + gt->hwsp = NULL; + } + + vma = gt->hwsp; + if (!vma) { + struct drm_i915_gem_object *bo; + + spin_unlock(>->hwsp_lock); - bo = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(bo)) - return PTR_ERR(bo); + BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE); + bo = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(bo)) + return PTR_ERR(bo); - i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); + i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); - vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(bo); - return PTR_ERR(vma); + vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(bo); + return PTR_ERR(vma); + } + + spin_lock(>->hwsp_lock); + if (gt->hwsp) { + i915_gem_object_put(bo); + goto restart; + } + + gt->hwsp = vma; + gt->bitmap = ~0ull; + offset = 0; } - timeline->hwsp_ggtt = vma; - timeline->hwsp_offset = 0; + gt->bitmap &= ~BIT_ULL(offset); + + spin_unlock(>->hwsp_lock); + + timeline->hwsp_ggtt = i915_vma_get(vma); + timeline->hwsp_offset = offset * CACHELINE_BYTES; return 0; +#undef NBITS +} + +static void hwsp_free(struct i915_timeline *timeline) +{ + struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; + + if (timeline->hwsp_ggtt != gt->hwsp) + return; + + spin_lock(>->hwsp_lock); + if (timeline->hwsp_ggtt == gt->hwsp) + gt->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); + spin_unlock(>->hwsp_lock); } int i915_timeline_init(struct drm_i915_private *i915, @@ -65,6 +112,7 @@ int i915_timeline_init(struct drm_i915_private *i915, vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { + hwsp_free(timeline); i915_vma_put(timeline->hwsp_ggtt); return PTR_ERR(vaddr); } @@ -99,6 +147,8 @@ void i915_timelines_init(struct drm_i915_private *i915) mutex_init(>->mutex); INIT_LIST_HEAD(>->list); + spin_lock_init(>->hwsp_lock); + /* via i915_gem_wait_for_idle() */ i915_gem_shrinker_taints_mutex(i915, >->mutex); } @@ -144,6 +194,9 @@ void i915_timeline_fini(struct i915_timeline *timeline) list_del(&timeline->link); mutex_unlock(>->mutex); + i915_syncmap_free(&timeline->sync); + hwsp_free(timeline); + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); i915_vma_put(timeline->hwsp_ggtt); } @@ -211,9 +264,14 @@ void __i915_timeline_free(struct kref *kref) void i915_timelines_fini(struct drm_i915_private *i915) { struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_vma *vma; GEM_BUG_ON(!list_empty(>->list)); + vma = fetch_and_zero(&i915->gt.timelines.hwsp); + if (vma) + i915_vma_put(vma); + mutex_destroy(>->mutex); } -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx