[PATCH 30/34] drm/i915: Keep timeline HWSP allocated until the system is idle

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In preparation for enabling HW semaphores, we need to keep in flight
timeline HWSP alive until the entire system is idle, as any other
timeline active on the GPU may still refer back to the already retired
timeline. We both have to delay recycling available cachelines and
unpinning old HWSP until the next idle point (i.e. on parking).

That we have to keep the HWSP alive for external references on HW raises
an interesting conundrum. On a busy system, we may never see a global
idle point, essentially meaning the resource will be leaking until we
are forced to sleep. What we need is a set of RCU primitives for the GPU!
This should also help mitigate the resource starvation issues
promulgating from keeping all logical state pinned until idle (instead
of as currently handled until the next context switch).

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h      |   2 +
 drivers/gpu/drm/i915/i915_request.c  |  34 ++++---
 drivers/gpu/drm/i915/i915_timeline.c | 127 ++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_timeline.h |   1 +
 4 files changed, 133 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5577e0e1034f..7ca701cf9086 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1981,7 +1981,9 @@ struct drm_i915_private {
 
 			/* Pack multiple timelines' seqnos into the same page */
 			spinlock_t hwsp_lock;
+			struct list_head hwsp_pin_list;
 			struct list_head hwsp_free_list;
+			struct list_head hwsp_dead_list;
 		} timelines;
 
 		struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index cca437ac8a7e..099c6f994b99 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -331,12 +331,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (tmp != rq);
 }
 
-static u32 timeline_get_seqno(struct i915_timeline *tl)
-{
-	tl->seqno += tl->has_initial_breadcrumb;
-	return ++tl->seqno;
-}
-
 static void move_to_timeline(struct i915_request *request,
 			     struct i915_timeline *timeline)
 {
@@ -538,8 +532,10 @@ struct i915_request *
 i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_request *rq;
 	struct intel_context *ce;
+	struct i915_timeline *tl;
+	struct i915_request *rq;
+	u32 seqno;
 	int ret;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -614,7 +610,15 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		}
 	}
 
-	rq->rcustate = get_state_synchronize_rcu();
+	tl = ce->ring->timeline;
+	GEM_BUG_ON(tl == &engine->timeline);
+	ret = i915_timeline_get_seqno(tl, &seqno);
+	if (ret)
+		goto err_free;
+
+	spin_lock_init(&rq->lock);
+	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
+		       tl->fence_context, seqno);
 
 	INIT_LIST_HEAD(&rq->active_list);
 	rq->i915 = i915;
@@ -622,16 +626,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->gem_context = ctx;
 	rq->hw_context = ce;
 	rq->ring = ce->ring;
-	rq->timeline = ce->ring->timeline;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
-	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
-
-	spin_lock_init(&rq->lock);
-	dma_fence_init(&rq->fence,
-		       &i915_fence_ops,
-		       &rq->lock,
-		       rq->timeline->fence_context,
-		       timeline_get_seqno(rq->timeline));
+	rq->timeline = tl;
+	rq->hwsp_seqno = tl->hwsp_seqno;
+	rq->rcustate = get_state_synchronize_rcu();
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
@@ -688,6 +685,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
+err_free:
 	kmem_cache_free(i915->requests, rq);
 err_unreserve:
 	unreserve_gt(i915);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 7bc9164733bc..a0bbc993048b 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -11,8 +11,11 @@
 
 struct i915_timeline_hwsp {
 	struct i915_vma *vma;
+	struct list_head pin_link;
 	struct list_head free_link;
+	struct list_head dead_link;
 	u64 free_bitmap;
+	u64 dead_bitmap;
 };
 
 static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
@@ -33,8 +36,7 @@ static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
 	return vma;
 }
 
-static struct i915_vma *
-hwsp_alloc(struct i915_timeline *timeline, int *offset)
+static struct i915_vma *hwsp_alloc(struct i915_timeline *timeline, int *offset)
 {
 	struct drm_i915_private *i915 = timeline->i915;
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
@@ -66,6 +68,7 @@ hwsp_alloc(struct i915_timeline *timeline, int *offset)
 		vma->private = hwsp;
 		hwsp->vma = vma;
 		hwsp->free_bitmap = ~0ull;
+		hwsp->dead_bitmap = 0;
 
 		spin_lock(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
@@ -96,18 +99,11 @@ static void hwsp_free(struct i915_timeline *timeline)
 
 	spin_lock(&gt->hwsp_lock);
 
-	/* As a cacheline becomes available, publish the HWSP on the freelist */
-	if (!hwsp->free_bitmap)
-		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
-
-	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	/* Defer recycling the HWSP cacheline until after the GPU is idle. */
+	if (!hwsp->dead_bitmap)
+		list_add_tail(&hwsp->dead_link, &gt->hwsp_dead_list);
 
-	/* And if no one is left using it, give the page back to the system */
-	if (hwsp->free_bitmap == ~0ull) {
-		i915_vma_put(hwsp->vma);
-		list_del(&hwsp->free_link);
-		kfree(hwsp);
-	}
+	hwsp->dead_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
 
 	spin_unlock(&gt->hwsp_lock);
 }
@@ -172,7 +168,9 @@ void i915_timelines_init(struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->active_list);
 
 	spin_lock_init(&gt->hwsp_lock);
+	INIT_LIST_HEAD(&gt->hwsp_pin_list);
 	INIT_LIST_HEAD(&gt->hwsp_free_list);
+	INIT_LIST_HEAD(&gt->hwsp_dead_list);
 
 	/* via i915_gem_wait_for_idle() */
 	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
@@ -209,6 +207,7 @@ static void timeline_inactive(struct i915_timeline *tl)
 void i915_timelines_park(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_timeline_hwsp *hwsp, *hn;
 	struct i915_timeline *timeline;
 
 	mutex_lock(&gt->mutex);
@@ -222,6 +221,38 @@ void i915_timelines_park(struct drm_i915_private *i915)
 		i915_syncmap_free(&timeline->sync);
 	}
 	mutex_unlock(&gt->mutex);
+
+	/*
+	 * Now the system is idle, we can be sure that there are no more
+	 * references to our old HWSP pages remaining on the HW, so we
+	 * can return the pages back to the system.
+	 */
+	spin_lock(&gt->hwsp_lock);
+
+	list_for_each_entry_safe(hwsp, hn, &gt->hwsp_pin_list, pin_link) {
+		INIT_LIST_HEAD(&hwsp->pin_link);
+		i915_vma_unpin(hwsp->vma);
+	}
+	INIT_LIST_HEAD(&gt->hwsp_pin_list);
+
+	list_for_each_entry_safe(hwsp, hn, &gt->hwsp_dead_list, dead_link) {
+		GEM_BUG_ON(!hwsp->dead_bitmap);
+
+		if (!hwsp->free_bitmap)
+			list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
+
+		hwsp->free_bitmap |= hwsp->dead_bitmap;
+		hwsp->dead_bitmap = 0;
+
+		if (hwsp->free_bitmap == ~0ull) {
+			list_del(&hwsp->free_link);
+			i915_vma_put(hwsp->vma);
+			kfree(hwsp);
+		}
+	}
+	INIT_LIST_HEAD(&gt->hwsp_dead_list);
+
+	spin_unlock(&gt->hwsp_lock);
 }
 
 void i915_timeline_fini(struct i915_timeline *timeline)
@@ -259,6 +290,24 @@ i915_timeline_create(struct drm_i915_private *i915,
 	return timeline;
 }
 
+static void
+__i915_timeline_pin_hwsp(struct i915_timeline *tl,
+			 struct i915_timeline_hwsp *hwsp)
+{
+	GEM_BUG_ON(!tl->pin_count);
+
+	if (hwsp && list_empty(&hwsp->pin_link)) {
+		struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
+
+		spin_lock(&gt->hwsp_lock);
+		if (list_empty(&hwsp->pin_link)) {
+			list_add(&hwsp->pin_link, &gt->hwsp_pin_list);
+			__i915_vma_pin(hwsp->vma);
+		}
+		spin_unlock(&gt->hwsp_lock);
+	}
+}
+
 int i915_timeline_pin(struct i915_timeline *tl)
 {
 	int err;
@@ -271,6 +320,7 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	if (err)
 		goto unpin;
 
+	__i915_timeline_pin_hwsp(tl, tl->hwsp_ggtt->private);
 	timeline_active(tl);
 
 	return 0;
@@ -280,6 +330,53 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	return err;
 }
 
+static u32 timeline_advance(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+	tl->seqno += tl->has_initial_breadcrumb;
+	return ++tl->seqno;
+}
+
+static void timeline_rollback(struct i915_timeline *tl)
+{
+	tl->seqno--;
+	tl->seqno -= tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	struct i915_vma *vma;
+	int offset;
+
+	vma = hwsp_alloc(tl, &offset);
+	if (IS_ERR(vma)) {
+		timeline_rollback(tl);
+		return PTR_ERR(vma);
+	}
+	hwsp_free(tl);
+
+	tl->hwsp_ggtt = i915_vma_get(vma);
+	tl->hwsp_offset = offset;
+	__i915_timeline_pin_hwsp(tl, vma->private);
+
+	*seqno = timeline_advance(tl);
+	return 0;
+}
+
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	*seqno = timeline_advance(tl);
+
+	/* Replace the HWSP on wraparound for HW semaphores */
+	if (unlikely(!*seqno && !i915_timeline_is_global(tl)))
+		return __i915_timeline_get_seqno(tl, seqno);
+
+	return 0;
+}
+
 void i915_timeline_unpin(struct i915_timeline *tl)
 {
 	GEM_BUG_ON(!tl->pin_count);
@@ -311,8 +408,12 @@ void i915_timelines_fini(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 
+	i915_timelines_park(i915);
+
 	GEM_BUG_ON(!list_empty(&gt->active_list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_pin_list));
 	GEM_BUG_ON(!list_empty(&gt->hwsp_free_list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_dead_list));
 
 	mutex_destroy(&gt->mutex);
 }
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 8caeb66d1cd5..c01b81a85a15 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -149,6 +149,7 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 }
 
 int i915_timeline_pin(struct i915_timeline *tl);
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno);
 void i915_timeline_unpin(struct i915_timeline *tl);
 
 void i915_timelines_init(struct drm_i915_private *i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux