---
drivers/gpu/drm/i915/display/intel_display.c | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 +-
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 140 ++++++++++--------
.../i915/gem/selftests/i915_gem_execbuffer.c | 4 +-
drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 4 +-
drivers/gpu/drm/i915/gt/gen6_ppgtt.h | 4 +-
drivers/gpu/drm/i915/gt/intel_context.c | 65 +++++---
drivers/gpu/drm/i915/gt/intel_context.h | 13 ++
drivers/gpu/drm/i915/gt/intel_context_types.h | 3 +-
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +-
drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 5 +-
drivers/gpu/drm/i915/gt/intel_renderstate.c | 2 +-
drivers/gpu/drm/i915/gt/intel_ring.c | 10 +-
drivers/gpu/drm/i915/gt/intel_ring.h | 3 +-
.../gpu/drm/i915/gt/intel_ring_submission.c | 15 +-
drivers/gpu/drm/i915/gt/intel_timeline.c | 12 +-
drivers/gpu/drm/i915/gt/intel_timeline.h | 3 +-
drivers/gpu/drm/i915/gt/mock_engine.c | 3 +-
drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +-
drivers/gpu/drm/i915/gt/selftest_timeline.c | 4 +-
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 13 +-
drivers/gpu/drm/i915/i915_gem.c | 11 +-
drivers/gpu/drm/i915/i915_vma.c | 13 +-
drivers/gpu/drm/i915/i915_vma.h | 13 +-
26 files changed, 217 insertions(+), 137 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 5b4434289117..aa5a88340d10 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3451,7 +3451,7 @@ initial_plane_vma(struct drm_i915_private *i915,
if (IS_ERR(vma))
goto err_obj;
- if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+ if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
goto err_obj;
if (i915_gem_object_is_tiled(obj) &&
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 34c8b0dd85e0..cf5ecbde9e06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1154,7 +1154,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
i915_gem_ww_ctx_init(&ww, true);
retry:
- err = intel_context_pin(ce);
+ err = intel_context_pin_ww(ce, &ww);
if (err)
goto err;
@@ -1247,7 +1247,7 @@ static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww
if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
/* ppGTT is not part of the legacy context image */
- return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+ return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 604e26adea23..94bfdc54f035 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -437,16 +437,17 @@ eb_pin_vma(struct i915_execbuffer *eb,
pin_flags |= PIN_GLOBAL;
/* Attempt to reuse the current location if available */
- if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+ /* TODO: Add -EDEADLK handling here */
+ if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) {
if (entry->flags & EXEC_OBJECT_PINNED)
return false;
/* Failing that pick any _free_ space if suitable */
- if (unlikely(i915_vma_pin(vma,
- entry->pad_to_size,
- entry->alignment,
- eb_pin_flags(entry, ev->flags) |
- PIN_USER | PIN_NOEVICT)))
+ if (unlikely(i915_vma_pin_ww(vma, &eb->ww,
+ entry->pad_to_size,
+ entry->alignment,
+ eb_pin_flags(entry, ev->flags) |
+ PIN_USER | PIN_NOEVICT)))
return false;
}
@@ -587,7 +588,7 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
obj->cache_level != I915_CACHE_NONE);
}
-static int eb_reserve_vma(const struct i915_execbuffer *eb,
+static int eb_reserve_vma(struct i915_execbuffer *eb,
struct eb_vma *ev,
u64 pin_flags)
{
@@ -602,7 +603,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
return err;
}
- err = i915_vma_pin(vma,
+ err = i915_vma_pin_ww(vma, &eb->ww,
entry->pad_to_size, entry->alignment,
eb_pin_flags(entry, ev->flags) | pin_flags);
if (err)
@@ -1133,9 +1134,10 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
}
static void *reloc_iomap(struct drm_i915_gem_object *obj,
- struct reloc_cache *cache,
+ struct i915_execbuffer *eb,
unsigned long page)
{
+ struct reloc_cache *cache = &eb->reloc_cache;
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned long offset;
void *vaddr;
@@ -1157,10 +1159,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err)
return ERR_PTR(err);
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
+ vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (vma == ERR_PTR(-EDEADLK))
+ return vma;
+
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
mutex_lock(&ggtt->vm.mutex);
@@ -1196,9 +1201,10 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
}
static void *reloc_vaddr(struct drm_i915_gem_object *obj,
- struct reloc_cache *cache,
+ struct i915_execbuffer *eb,
unsigned long page)
{
+ struct reloc_cache *cache = &eb->reloc_cache;
void *vaddr;
if (cache->page == page) {
@@ -1206,7 +1212,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
- vaddr = reloc_iomap(obj, cache, page);
+ vaddr = reloc_iomap(obj, eb, page);
if (!vaddr)
vaddr = reloc_kmap(obj, cache, page);
}
@@ -1293,7 +1299,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto err_unmap;
}
- err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+ err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
if (err)
goto err_unmap;
@@ -1314,7 +1320,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
eb->reloc_context = ce;
}
- err = intel_context_pin(ce);
+ err = intel_context_pin_ww(ce, &eb->ww);
if (err)
goto err_unpin;
@@ -1537,8 +1543,7 @@ relocate_entry(struct i915_vma *vma,
void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj,
- &eb->reloc_cache,
+ vaddr = reloc_vaddr(vma->obj, eb,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -1954,6 +1959,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
rq = eb_pin_engine(eb, false);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
+ rq = NULL;
goto err;
}
@@ -2238,7 +2244,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
}
static struct i915_vma *
-shadow_batch_pin(struct drm_i915_gem_object *obj,
+shadow_batch_pin(struct i915_execbuffer *eb,
+ struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
unsigned int flags)
{
@@ -2249,7 +2256,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
return vma;
- err = i915_vma_pin(vma, 0, 0, flags);
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
if (err)
return ERR_PTR(err);
@@ -2403,16 +2410,33 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
return err;
}
+static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+ /*
+ * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+ * batch" bit. Hence we need to pin secure batches into the global gtt.
+ * hsw should have this fixed, but bdw mucks it up again. */
+ if (eb->batch_flags & I915_DISPATCH_SECURE)
+ return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+
+ return NULL;
+}
+
static int eb_parse(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
- struct i915_vma *shadow, *trampoline;
+ struct i915_vma *shadow, *trampoline, *batch;
unsigned int len;
int err;
- if (!eb_use_cmdparser(eb))
- return 0;
+ if (!eb_use_cmdparser(eb)) {
+ batch = eb_dispatch_secure(eb, eb->batch->vma);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ goto secure_batch;
+ }
len = eb->batch_len;
if (!CMDPARSER_USES_GGTT(eb->i915)) {
@@ -2440,7 +2464,7 @@ static int eb_parse(struct i915_execbuffer *eb)
if (err)
goto err;
- shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+ shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
if (IS_ERR(shadow)) {
err = PTR_ERR(shadow);
goto err;
@@ -2452,7 +2476,7 @@ static int eb_parse(struct i915_execbuffer *eb)
if (CMDPARSER_USES_GGTT(eb->i915)) {
trampoline = shadow;
- shadow = shadow_batch_pin(pool->obj,
+ shadow = shadow_batch_pin(eb, pool->obj,
&eb->engine->gt->ggtt->vm,
PIN_GLOBAL);
if (IS_ERR(shadow)) {
@@ -2465,19 +2489,34 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_flags |= I915_DISPATCH_SECURE;
}
+ batch = eb_dispatch_secure(eb, shadow);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto err_trampoline;
+ }
+
err = eb_parse_pipeline(eb, shadow, trampoline);
if (err)
- goto err_trampoline;
+ goto err_unpin_batch;
- eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
- eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
eb->batch = &eb->vma[eb->buffer_count++];
+ eb->batch->vma = i915_vma_get(shadow);
+ eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
eb->trampoline = trampoline;
eb->batch_start_offset = 0;
+secure_batch:
+ if (batch) {
+ eb->batch = &eb->vma[eb->buffer_count++];
+ eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
+ eb->batch->vma = i915_vma_get(batch);
+ }
return 0;
+err_unpin_batch:
+ if (batch)
+ i915_vma_unpin(batch);
err_trampoline:
if (trampoline)
i915_vma_unpin(trampoline);
@@ -2619,7 +2658,7 @@ static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throt
* GGTT space, so do this first before we reserve a seqno for
* ourselves.
*/
- err = intel_context_pin(ce);
+ err = intel_context_pin_ww(ce, &eb->ww);
if (err)
return ERR_PTR(err);
@@ -3237,33 +3276,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
ww_acquire_done(&eb.ww.ctx);
- /*
- * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
- * batch" bit. Hence we need to pin secure batches into the global gtt.
- * hsw should have this fixed, but bdw mucks it up again. */
- if (eb.batch_flags & I915_DISPATCH_SECURE) {
- struct i915_vma *vma;
-
- /*
- * So on first glance it looks freaky that we pin the batch here
- * outside of the reservation loop. But:
- * - The batch is already pinned into the relevant ppgtt, so we
- * already have the backing storage fully allocated.
- * - No other BO uses the global gtt (well contexts, but meh),
- * so we don't really have issues with multiple objects not
- * fitting due to fragmentation.
- * So this is actually safe.
- */
- vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_vma;
- }
-
- batch = vma;
- } else {
- batch = eb.batch->vma;
- }
+ batch = eb.batch->vma;
/* All GPU relocation batches must be submitted prior to the user rq */
GEM_BUG_ON(eb.reloc_cache.rq);
@@ -3272,7 +3285,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.request = i915_request_create(eb.context);
if (IS_ERR(eb.request)) {
err = PTR_ERR(eb.request);
- goto err_batch_unpin;
+ goto err_vma;
}
if (in_fence) {
@@ -3333,9 +3346,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
i915_request_put(eb.request);
-err_batch_unpin:
- if (eb.batch_flags & I915_DISPATCH_SECURE)
- i915_vma_unpin(batch);
err_vma:
eb_release_vmas(&eb, true);
if (eb.trampoline)
@@ -3423,7 +3433,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
/* Copy in the exec list from userland */
exec_list = kvmalloc_array(count, sizeof(*exec_list),
__GFP_NOWARN | GFP_KERNEL);
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+
+ /* Allocate extra slots for use by the command parser */
+ exec2_list = kvmalloc_array(count + 2, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec_list == NULL || exec2_list == NULL) {
drm_dbg(&i915->drm,
@@ -3500,8 +3512,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
if (err)
return err;
- /* Allocate an extra slot for use by the command parser */
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+ /* Allocate extra slots for use by the command parser */
+ exec2_list = kvmalloc_array(count + 2, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec2_list == NULL) {
drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 563839cbaf1c..e1d50a5a1477 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -36,7 +36,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (err)
return err;
- err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH);
if (err)
return err;
@@ -139,7 +139,7 @@ static int igt_gpu_reloc(void *arg)
i915_gem_ww_ctx_init(&eb.ww, false);
retry:
- err = intel_context_pin(eb.context);
+ err = intel_context_pin_ww(eb.context, &eb.ww);
if (!err) {
err = __igt_gpu_reloc(&eb, scratch);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 7e5a86b774a7..fd0d24d28763 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -368,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
return vma;
}
-int gen6_ppgtt_pin(struct i915_ppgtt *base)
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
int err;
@@ -394,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
*/
err = 0;
if (!atomic_read(&ppgtt->pin_count))
- err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+ err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
if (!err)
atomic_inc(&ppgtt->pin_count);
mutex_unlock(&ppgtt->pin_mutex);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
index 7249672e5802..3357228f3304 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -8,6 +8,8 @@
#include "intel_gtt.h"
+struct i915_gem_ww_ctx;
+
struct gen6_ppgtt {
struct i915_ppgtt base;
@@ -67,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
(pt = i915_pt_entry(pd, iter), true); \
++iter)
-int gen6_ppgtt_pin(struct i915_ppgtt *base);
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
void gen6_ppgtt_unpin(struct i915_ppgtt *base);
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
void gen6_ppgtt_enable(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index efe9a7a89ede..c05ef213bdc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,12 +93,12 @@ static void intel_context_active_release(struct intel_context *ce)
i915_active_release(&ce->active);
}
-static int __context_pin_state(struct i915_vma *vma)
+static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
{
unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
int err;
- err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
+ err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
if (err)
return err;
@@ -127,11 +127,12 @@ static void __context_unpin_state(struct i915_vma *vma)
__i915_vma_unpin(vma);
}
-static int __ring_active(struct intel_ring *ring)
+static int __ring_active(struct intel_ring *ring,
+ struct i915_gem_ww_ctx *ww)
{
int err;
- err = intel_ring_pin(ring);
+ err = intel_ring_pin(ring, ww);
if (err)
return err;
@@ -152,24 +153,25 @@ static void __ring_retire(struct intel_ring *ring)
intel_ring_unpin(ring);
}
-static int intel_context_pre_pin(struct intel_context *ce)
+static int intel_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
{
int err;
CE_TRACE(ce, "active\n");
- err = __ring_active(ce->ring);
+ err = __ring_active(ce->ring, ww);
if (err)
return err;
- err = intel_timeline_pin(ce->timeline);
+ err = intel_timeline_pin(ce->timeline, ww);
if (err)
goto err_ring;
if (!ce->state)
return 0;
- err = __context_pin_state(ce->state);
+ err = __context_pin_state(ce->state, ww);
if (err)
goto err_timeline;
@@ -192,7 +194,8 @@ static void intel_context_post_unpin(struct intel_context *ce)
__ring_retire(ce->ring);
}
-int __intel_context_do_pin(struct intel_context *ce)
+int __intel_context_do_pin_ww(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
{
bool handoff = false;
void *vaddr;
@@ -209,7 +212,14 @@ int __intel_context_do_pin(struct intel_context *ce)
* refcount for __intel_context_active(), which prevent a lock
* inversion of ce->pin_mutex vs dma_resv_lock().
*/
- err = intel_context_pre_pin(ce);
+
+ err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);