We can simplify our tracking of pending writes in an execbuf to the single bit in the vma->exec_entry->flags, but that requires the relocation function knowing the object's vma. Pass it along. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 12 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 104 ++++++++++++----------------- drivers/gpu/drm/i915/intel_display.c | 2 +- 4 files changed, 52 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 29e1d2ed8b05..2ceefce0e731 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2847,7 +2847,8 @@ static inline void i915_gem_object_unpin_vmap(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *to); + struct drm_i915_gem_request *to, + bool write); void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned flags); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 91f764e9dff2..3eeca1fb89d2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2597,9 +2597,9 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *to) + struct drm_i915_gem_request *to, + bool write) { - const bool readonly = obj->base.pending_write_domain == 0; struct drm_i915_gem_request *req[I915_NUM_RINGS]; int ret, i, n; @@ -2607,13 +2607,13 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, return 0; n = 0; - if (readonly) { - if (obj->last_write.request) - req[n++] = obj->last_write.request; - } else { + if (write) { for (i = 0; i < I915_NUM_RINGS; i++) if (obj->last_read[i].request) req[n++] = obj->last_read[i].request; + } else { + if (obj->last_write.request) + req[n++] = obj->last_write.request; } for (i = 0; i < n; i++) { ret = __i915_gem_object_sync(obj, to, req[i]); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 891c4593b8eb..2868e094f67c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -543,42 +543,25 @@ repeat: } static int -eb_relocate_entry(struct drm_i915_gem_object *obj, +eb_relocate_entry(struct i915_vma *vma, struct i915_execbuffer *eb, struct drm_i915_gem_relocation_entry *reloc) { - struct drm_gem_object *target_obj; - struct drm_i915_gem_object *target_i915_obj; - struct i915_vma *target_vma; - uint64_t target_offset; + struct i915_vma *target; + u64 target_offset; int ret; /* we've already hold a reference to all valid objects */ - target_vma = eb_get_vma(eb, reloc->target_handle); - if (unlikely(target_vma == NULL)) + target = eb_get_vma(eb, reloc->target_handle); + if (unlikely(target == NULL)) return -ENOENT; - target_i915_obj = target_vma->obj; - target_obj = &target_vma->obj->base; - - target_offset = gen8_canonical_addr(target_vma->node.start); - - /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and - * pipe_control writes because the gpu doesn't properly redirect them - * through the ppgtt for non_secure batchbuffers. */ - if (unlikely(IS_GEN6(eb->i915) && - reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { - ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, - PIN_GLOBAL); - if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) - return ret; - } /* Validate that the target is in a valid r/w GPU domain */ if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { DRM_DEBUG("reloc with multiple write domains: " - "obj %p target %d offset %d " + "target %d offset %d " "read %08x write %08x", - obj, reloc->target_handle, + reloc->target_handle, (int) reloc->offset, reloc->read_domains, reloc->write_domain); @@ -587,47 +570,59 @@ eb_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely((reloc->write_domain | reloc->read_domains) & ~I915_GEM_GPU_DOMAINS)) { DRM_DEBUG("reloc with read/write non-GPU domains: " - "obj %p target %d offset %d " + "target %d offset %d " "read %08x write %08x", - obj, reloc->target_handle, + reloc->target_handle, (int) reloc->offset, reloc->read_domains, reloc->write_domain); return -EINVAL; } - target_obj->pending_read_domains |= reloc->read_domains; - target_obj->pending_write_domain |= reloc->write_domain; + if (reloc->write_domain) + target->exec_entry->flags |= EXEC_OBJECT_WRITE; + + /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and + * pipe_control writes because the gpu doesn't properly redirect them + * through the ppgtt for non_secure batchbuffers. */ + if (unlikely(IS_GEN6(eb->i915) && + reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { + ret = i915_vma_bind(target, target->obj->cache_level, + PIN_GLOBAL); + if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) + return ret; + } /* If the relocation already has the right value in it, no * more work needs to be done. */ + target_offset = gen8_canonical_addr(target->node.start); if (target_offset == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { + vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " - "obj %p target %d offset %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->offset, - (int) obj->base.size); + "target %d offset %d size %d.\n", + reloc->target_handle, + (int)reloc->offset, + (int)vma->size); return -EINVAL; } if (unlikely(reloc->offset & 3)) { DRM_DEBUG("Relocation not 4-byte aligned: " - "obj %p target %d offset %d.\n", - obj, reloc->target_handle, - (int) reloc->offset); + "target %d offset %d.\n", + reloc->target_handle, + (int)reloc->offset); return -EINVAL; } /* We can't wait for rendering with pagefaults disabled */ - if (i915_gem_object_is_active(obj) && pagefault_disabled()) + if (i915_gem_object_is_active(vma->obj) && pagefault_disabled()) return -EFAULT; - ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset); + ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset); if (ret) return ret; @@ -662,7 +657,7 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) do { u64 offset = r->presumed_offset; - ret = eb_relocate_entry(vma->obj, eb, r); + ret = eb_relocate_entry(vma, eb, r); if (ret) goto out; @@ -694,7 +689,7 @@ eb_relocate_vma_slow(struct i915_vma *vma, int i, ret = 0; for (i = 0; i < entry->relocation_count; i++) { - ret = eb_relocate_entry(vma->obj, eb, &relocs[i]); + ret = eb_relocate_entry(vma, eb, &relocs[i]); if (ret) break; } @@ -736,7 +731,6 @@ eb_reserve_vma(struct i915_vma *vma, struct intel_engine_cs *ring, bool *need_reloc) { - struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; uint64_t flags; int ret; @@ -790,11 +784,6 @@ eb_reserve_vma(struct i915_vma *vma, *need_reloc = true; } - if (entry->flags & EXEC_OBJECT_WRITE) { - obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; - obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; - } - return 0; } @@ -855,7 +844,6 @@ eb_vma_misplaced(struct i915_vma *vma) static int eb_reserve(struct i915_execbuffer *eb) { const bool has_fenced_gpu_access = INTEL_INFO(eb->i915)->gen < 4; - struct drm_i915_gem_object *obj; struct i915_vma *vma; struct list_head ordered_vmas; struct list_head pinned_vmas; @@ -868,7 +856,6 @@ static int eb_reserve(struct i915_execbuffer *eb) bool need_fence, need_mappable; vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); - obj = vma->obj; entry = vma->exec_entry; if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) @@ -888,9 +875,6 @@ static int eb_reserve(struct i915_execbuffer *eb) list_move(&vma->exec_list, &ordered_vmas); } else list_move_tail(&vma->exec_list, &ordered_vmas); - - obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; - obj->base.pending_write_domain = 0; } list_splice(&ordered_vmas, &eb->vmas); list_splice(&pinned_vmas, &eb->vmas); @@ -1085,7 +1069,9 @@ eb_move_to_gpu(struct i915_execbuffer *eb) struct drm_i915_gem_object *obj = vma->obj; if (obj->flags & other_rings) { - ret = i915_gem_object_sync(obj, eb->request); + ret = i915_gem_object_sync(obj, + eb->request, + vma->exec_entry->flags & EXEC_OBJECT_WRITE); if (ret) return ret; } @@ -1248,12 +1234,10 @@ eb_move_to_active(struct i915_execbuffer *eb) u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; - obj->base.write_domain = obj->base.pending_write_domain; - if (obj->base.write_domain) - vma->exec_entry->flags |= EXEC_OBJECT_WRITE; - else - obj->base.pending_read_domains |= obj->base.read_domains; - obj->base.read_domains = obj->base.pending_read_domains; + obj->base.write_domain = 0; + if (vma->exec_entry->flags & EXEC_OBJECT_WRITE) + obj->base.read_domains = 0; + obj->base.read_domains |= I915_GEM_GPU_DOMAINS; i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); @@ -1598,7 +1582,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (eb.batch_vma->obj->base.pending_write_domain) { + if (eb.batch_vma->exec_entry->flags & EXEC_OBJECT_WRITE) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; @@ -1630,8 +1614,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } - eb.batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; - /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 13d283e4b0a3..e518d3300a3e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11670,7 +11670,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, goto cleanup_pending; } - ret = i915_gem_object_sync(obj, request); + ret = i915_gem_object_sync(obj, request, false); if (ret) goto cleanup_request; } -- 2.7.0.rc3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx