On 17/01/2019 14:34, Chris Wilson wrote:
Keep track of partially allocated pages for use in allocating future
timeline HWSP. This is still without migration, so it is possible for
the system to end up with each timeline in its own page, but we ensure
that no new allocation would needless allocate a fresh page!
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: John Harrison <John.C.Harrison@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_drv.h | 3 +-
drivers/gpu/drm/i915/i915_timeline.c | 81 +++++++++++++++++-----------
2 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d59228dabb6e..0bebef428f1e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1981,8 +1981,7 @@ struct drm_i915_private {
/* Pack multiple timelines' seqnos into the same page */
spinlock_t hwsp_lock;
- struct i915_vma *hwsp;
- u64 bitmap;
+ struct list_head hwsp;
hwsp_list to use our established convention? Actually, hwsp_free_list
would be even more self-documenting.
} timelines;
struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index e939a9e1a4ab..64bb1ce24318 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,74 +9,94 @@
#include "i915_timeline.h"
#include "i915_syncmap.h"
+struct i915_timeline_hwsp {
+ struct list_head link;
+ struct i915_vma *vma;
+ u64 bitmap;
+};
+
static int hwsp_alloc(struct i915_timeline *timeline)
{
-#define NBITS BITS_PER_TYPE(typeof(gt->bitmap))
struct drm_i915_private *i915 = timeline->i915;
struct i915_gt_timelines *gt = &i915->gt.timelines;
- struct i915_vma *vma;
+ struct i915_timeline_hwsp *hwsp;
int offset;
spin_lock(>->hwsp_lock);
-restart:
- offset = find_first_bit((unsigned long *)>->bitmap, NBITS);
- if (offset == NBITS && gt->hwsp) {
- i915_vma_put(gt->hwsp);
- gt->hwsp = NULL;
- }
-
- vma = gt->hwsp;
- if (!vma) {
+ hwsp = list_first_entry_or_null(>->hwsp, typeof(*hwsp), link);
+ if (!hwsp) {
struct drm_i915_gem_object *bo;
+ struct i915_vma *vma;
spin_unlock(>->hwsp_lock);
- BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
+ hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
+ if (!hwsp)
+ return -ENOMEM;
+
+ BUILD_BUG_ON(BITS_PER_TYPE(hwsp->bitmap) * CACHELINE_BYTES > PAGE_SIZE);
bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(bo))
+ if (IS_ERR(bo)) {
+ kfree(hwsp);
return PTR_ERR(bo);
+ }
i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(bo);
+ kfree(hwsp);
return PTR_ERR(vma);
}
- spin_lock(>->hwsp_lock);
- if (gt->hwsp) {
- i915_gem_object_put(bo);
- goto restart;
- }
+ vma->private = hwsp;
+ hwsp->vma = vma;
+ hwsp->bitmap = ~0ull;
- gt->hwsp = vma;
- gt->bitmap = ~0ull;
- offset = 0;
+ spin_lock(>->hwsp_lock);
+ list_add(&hwsp->link, >->hwsp);
}
- gt->bitmap &= ~BIT_ULL(offset);
+ GEM_BUG_ON(!hwsp->bitmap);
+ offset = __ffs64(hwsp->bitmap);
I never can remember from which side is first. With find_first_bit I
didn't have this problem. :)
+ hwsp->bitmap &= ~BIT_ULL(offset);
+ if (!hwsp->bitmap)
+ list_del(&hwsp->link);
spin_unlock(>->hwsp_lock);
- timeline->hwsp_ggtt = i915_vma_get(vma);
+ timeline->hwsp_ggtt = i915_vma_get(hwsp->vma);
timeline->hwsp_offset = offset * CACHELINE_BYTES;
+ GEM_BUG_ON(timeline->hwsp_ggtt->private != hwsp);
+
return 0;
-#undef NBITS
}
static void hwsp_free(struct i915_timeline *timeline)
{
struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
+ struct i915_timeline_hwsp *hwsp;
- if (timeline->hwsp_ggtt != gt->hwsp)
+ hwsp = timeline->hwsp_ggtt->private;
+ if (!hwsp)
return;
Hm is there a path to this return now?
spin_lock(>->hwsp_lock);
- if (timeline->hwsp_ggtt == gt->hwsp)
- gt->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+
+ if (!hwsp->bitmap)
+ list_add_tail(&hwsp->link, >->hwsp);
I got so deep into the code that I forgot to remind to add some more
comments. :) This seems like a good place - oh well.. who would come up
with good comments anyway.
+
+ hwsp->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+
+ if (hwsp->bitmap == ~0ull) {
+ i915_vma_put(hwsp->vma);
+ list_del(&hwsp->link);
+ kfree(hwsp);
+ }
+
spin_unlock(>->hwsp_lock);
}
@@ -148,6 +168,7 @@ void i915_timelines_init(struct drm_i915_private *i915)
INIT_LIST_HEAD(>->list);
spin_lock_init(>->hwsp_lock);
+ INIT_LIST_HEAD(>->hwsp);
/* via i915_gem_wait_for_idle() */
i915_gem_shrinker_taints_mutex(i915, >->mutex);
@@ -264,13 +285,9 @@ void __i915_timeline_free(struct kref *kref)
void i915_timelines_fini(struct drm_i915_private *i915)
{
struct i915_gt_timelines *gt = &i915->gt.timelines;
- struct i915_vma *vma;
GEM_BUG_ON(!list_empty(>->list));
-
- vma = fetch_and_zero(&i915->gt.timelines.hwsp);
- if (vma)
- i915_vma_put(vma);
+ GEM_BUG_ON(!list_empty(>->hwsp));
mutex_destroy(>->mutex);
}
The only thing missing is a selftest which will exercise some "chunky"
timeline allocations and frees. Just to exercise this logic a bit..
something like maybe (in very crude pseudo-code):
blocks = { 1, 64 / 2, 64 - 1, 64, 64 + 1, 64 * 3 / 2 };
for_each_block {
for 0..block
tl_emit_rq(block);
sync
if (flags & LINEAR)
for 0..block
tl_free
else if (flags & RANDOM_FREE)
...
}
It's a very terse and sloppy sketch but I think you'll know what I am
trying to say.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx