Our timeline lock is our defence against a concurrent execbuf
interrupting our request construction. we need hold it throughout or,
for example, a second thread may interject a relocation request in
between our own relocation request and execution in the ring.
A second, major benefit, is that it allows us to preserve a large chunk
of the ringbuffer for our exclusive use; which should virtually
eliminate the threat of hitting a wait_for_space during request
construction -- although we should have already dropped other
contentious locks at that point.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 413 +++++++++++-------
.../i915/gem/selftests/i915_gem_execbuffer.c | 24 +-
2 files changed, 281 insertions(+), 156 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 719ba9fe3e85..af3499aafd22 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -259,6 +259,8 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
+ struct intel_context *ce;
+
struct i915_vma *target;
struct i915_request *rq;
struct i915_vma *rq_vma;
@@ -639,6 +641,35 @@ static int eb_reserve_vma(const struct
i915_execbuffer *eb,
return 0;
}
+static void retire_requests(struct intel_timeline *tl)
+{
+ struct i915_request *rq, *rn;
+
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
+ if (!i915_request_retire(rq))
+ break;
+}
+
+static int wait_for_timeline(struct intel_timeline *tl)
+{
+ do {
+ struct dma_fence *fence;
+ int err;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
+ return 0;
+
+ err = dma_fence_wait(fence, true);
+ dma_fence_put(fence);
+ if (err)
+ return err;
+
+ /* Retiring may trigger a barrier, requiring an extra pass */
+ retire_requests(tl);
+ } while (1);
+}
+
static int eb_reserve(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
@@ -646,7 +677,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
struct list_head last;
struct eb_vma *ev;
unsigned int i, pass;
- int err = 0;
/*
* Attempt to pin all of the buffers into the GTT.
@@ -662,18 +692,37 @@ static int eb_reserve(struct i915_execbuffer *eb)
* room for the earlier objects *unless* we need to defragment.
*/
- if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
- return -EINTR;
-
pass = 0;
do {
+ int err = 0;
+
+ /*
+ * We need to hold one lock as we bind all the vma so that
+ * we have a consistent view of the entire vm and can plan
+ * evictions to fill the whole GTT. If we allow a second
+ * thread to run as we do this, it will either unbind
+ * everything we want pinned, or steal space that we need for
+ * ourselves. The closer we are to a full GTT, the more likely
+ * such contention will cause us to fail to bind the workload
+ * for this batch. Since we know at this point we need to
+ * find space for new buffers, we know that extra pressure
+ * from contention is likely.
+ *
+ * In lieu of being able to hold vm->mutex for the entire
+ * sequence (it's complicated!), we opt for struct_mutex.
+ */
+ if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
+ return -EINTR;
+