As we can now keep chaining together a relocation batch to process any
number of relocations, we can keep building that relocation batch for
all of the target vma. This avoiding emitting a new request into the
ring for each target, consuming precious ring space and a potential
stall.
v2: Propagate the failure from submitting the relocation batch.
Testcase: igt/gem_exec_reloc/basic-wide-active
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> #v1
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 31 ++++++++++++-------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 0874976b1cf7..4c4b9e0e75bc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -268,6 +268,7 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
+ struct i915_vma *target;
struct i915_request *rq;
u32 *rq_cmd;
unsigned int rq_size;
@@ -1051,14 +1052,14 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
}
-static void reloc_gpu_flush(struct reloc_cache *cache)
+static int reloc_gpu_flush(struct reloc_cache *cache)
{
struct i915_request *rq;
int err;
rq = fetch_and_zero(&cache->rq);
if (!rq)
- return;
+ return 0;
if (cache->rq_vma) {
struct drm_i915_gem_object *obj = cache->rq_vma->obj;
@@ -1084,15 +1085,14 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
intel_gt_chipset_flush(rq->engine->gt);
i915_request_add(rq);
+
+ return err;
}
static void reloc_cache_reset(struct reloc_cache *cache)
{
void *vaddr;
- if (cache->rq)
- reloc_gpu_flush(cache);
-
if (!cache->vaddr)
return;
@@ -1285,7 +1285,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
}
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
- struct i915_vma *vma,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
@@ -1308,7 +1307,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto out_pool;
}
- batch = i915_vma_instance(pool->obj, vma->vm, NULL);
+ batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_unmap;
@@ -1328,10 +1327,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- err = reloc_move_to_gpu(rq, vma);
- if (err)
- goto err_request;
-
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
@@ -1376,11 +1371,19 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);
- err = __reloc_gpu_alloc(eb, vma, len);
+ err = __reloc_gpu_alloc(eb, len);
if (unlikely(err))
return ERR_PTR(err);
}
+ if (vma != cache->target) {
+ err = reloc_move_to_gpu(cache->rq, vma);
+ if (unlikely(err))
+ return ERR_PTR(err);
+
+ cache->target = vma;
+ }
+
if (unlikely(cache->rq_size + len >
PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
err = reloc_gpu_chain(cache);
@@ -1698,6 +1701,10 @@ static int eb_relocate(struct i915_execbuffer *eb)
if (err)
return err;
}
+
+ err = reloc_gpu_flush(&eb->reloc_cache);
+ if (err)
+ return err;
}
return 0;