Combine the two slightly overlapping parameter structures we pass around the execbuffer routines into one. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 557 ++++++++++++----------------- 1 file changed, 235 insertions(+), 322 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5689445b1cd3..7cb5b9ad9212 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -49,70 +49,73 @@ #define BATCH_OFFSET_BIAS (256*1024) -struct i915_execbuffer_params { - struct drm_device *dev; - struct drm_file *file; - struct i915_vma *batch; - u32 dispatch_flags; - u32 args_batch_start_offset; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct drm_i915_gem_request *request; -}; - -struct eb_vmas { +struct i915_execbuffer { struct drm_i915_private *i915; + struct drm_file *file; + struct drm_i915_gem_execbuffer2 *args; + struct drm_i915_gem_exec_object2 *exec; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct i915_address_space *vm; + struct i915_vma *batch; + struct drm_i915_gem_request *request; + u32 batch_start_offset; + unsigned int dispatch_flags; + struct drm_i915_gem_exec_object2 shadow_exec_entry; + bool need_relocs; struct list_head vmas; + struct reloc_cache { + struct drm_mm_node node; + unsigned long vaddr; + unsigned int page; + bool use_64bit_reloc; + } reloc_cache; int and; union { - struct i915_vma *lut[0]; - struct hlist_head buckets[0]; + struct i915_vma **lut; + struct hlist_head *buckets; }; }; -static struct eb_vmas * -eb_create(struct drm_i915_private *i915, - struct drm_i915_gem_execbuffer2 *args) +static int +eb_create(struct i915_execbuffer *eb) { - struct eb_vmas *eb = NULL; - - if (args->flags & I915_EXEC_HANDLE_LUT) { - unsigned size = args->buffer_count; + eb->lut = NULL; + if (eb->args->flags & I915_EXEC_HANDLE_LUT) { + unsigned int size = eb->args->buffer_count; size *= sizeof(struct i915_vma *); - size += sizeof(struct eb_vmas); - eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + eb->lut = kmalloc(size, + GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); } - if (eb == NULL) { - unsigned size = args->buffer_count; - unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2; + if (!eb->lut) { + unsigned int size = eb->args->buffer_count; + unsigned int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); while (count > 2*size) count >>= 1; - eb = kzalloc(count*sizeof(struct hlist_head) + - sizeof(struct eb_vmas), - GFP_TEMPORARY); - if (eb == NULL) - return eb; + eb->lut = kzalloc(count*sizeof(struct hlist_head), + GFP_TEMPORARY); + if (!eb->lut) + return -ENOMEM; eb->and = count - 1; } else - eb->and = -args->buffer_count; + eb->and = -eb->args->buffer_count; - eb->i915 = i915; INIT_LIST_HEAD(&eb->vmas); - return eb; + return 0; } static void -eb_reset(struct eb_vmas *eb) +eb_reset(struct i915_execbuffer *eb) { if (eb->and >= 0) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } static struct i915_vma * -eb_get_batch(struct eb_vmas *eb) +eb_get_batch(struct i915_execbuffer *eb) { struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); @@ -132,41 +135,37 @@ eb_get_batch(struct eb_vmas *eb) } static int -eb_lookup_vmas(struct eb_vmas *eb, - struct drm_i915_gem_exec_object2 *exec, - const struct drm_i915_gem_execbuffer2 *args, - struct i915_address_space *vm, - struct drm_file *file) +eb_lookup_vmas(struct i915_execbuffer *eb) { struct drm_i915_gem_object *obj; struct list_head objects; int i, ret; INIT_LIST_HEAD(&objects); - spin_lock(&file->table_lock); + spin_lock(&eb->file->table_lock); /* Grab a reference to the object and release the lock so we can lookup * or create the VMA without using GFP_ATOMIC */ - for (i = 0; i < args->buffer_count; i++) { - obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); + for (i = 0; i < eb->args->buffer_count; i++) { + obj = to_intel_bo(idr_find(&eb->file->object_idr, eb->exec[i].handle)); if (obj == NULL) { - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); DRM_DEBUG("Invalid object handle %d at index %d\n", - exec[i].handle, i); + eb->exec[i].handle, i); ret = -ENOENT; goto err; } if (!list_empty(&obj->obj_exec_link)) { - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", - obj, exec[i].handle, i); + obj, eb->exec[i].handle, i); ret = -EINVAL; goto err; } list_add_tail(&obj->obj_exec_link, &objects); } - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); i = 0; while (!list_empty(&objects)) { @@ -184,7 +183,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL); + vma = i915_gem_obj_lookup_or_create_vma(obj, eb->vm, NULL); if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -195,11 +194,13 @@ eb_lookup_vmas(struct eb_vmas *eb, list_add_tail(&vma->exec_list, &eb->vmas); list_del_init(&obj->obj_exec_link); - vma->exec_entry = &exec[i]; + vma->exec_entry = &eb->exec[i]; if (eb->and < 0) { eb->lut[i] = vma; } else { - uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; + u32 handle = + eb->args->flags & I915_EXEC_HANDLE_LUT ? + i : eb->exec[i].handle; vma->exec_handle = handle; hlist_add_head(&vma->exec_node, &eb->buckets[handle & eb->and]); @@ -226,7 +227,7 @@ err: return ret; } -static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) +static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) { if (eb->and < 0) { if (handle >= -eb->and) @@ -246,7 +247,7 @@ static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) } static void -i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) +eb_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; @@ -264,8 +265,10 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } -static void eb_destroy(struct eb_vmas *eb) +static void eb_destroy(struct i915_execbuffer *eb) { + i915_gem_context_put(eb->ctx); + while (!list_empty(&eb->vmas)) { struct i915_vma *vma; @@ -273,9 +276,8 @@ static void eb_destroy(struct eb_vmas *eb) struct i915_vma, exec_list); list_del_init(&vma->exec_list); - i915_gem_execbuffer_unreserve_vma(vma); + eb_unreserve_vma(vma); } - kfree(eb); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) @@ -316,21 +318,12 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target_offset); } -struct reloc_cache { - struct drm_i915_private *i915; - struct drm_mm_node node; - unsigned long vaddr; - unsigned int page; - bool use_64bit_reloc; -}; - static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { cache->page = -1; cache->vaddr = 0; - cache->i915 = i915; - cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; + cache->use_64bit_reloc = INTEL_GEN(i915) >= 8; cache->node.allocated = false; } @@ -346,7 +339,14 @@ static inline unsigned int unmask_flags(unsigned long p) #define KMAP 0x4 /* after CLFLUSH_FLAGS */ -static void reloc_cache_fini(struct reloc_cache *cache) +static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +{ + struct drm_i915_private *i915 = + container_of(cache, struct i915_execbuffer, reloc_cache)->i915; + return &i915->ggtt; +} + +static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; @@ -364,7 +364,7 @@ static void reloc_cache_fini(struct reloc_cache *cache) wmb(); io_mapping_unmap_atomic((void __iomem *)vaddr); if (cache->node.allocated) { - struct i915_ggtt *ggtt = &cache->i915->ggtt; + struct i915_ggtt *ggtt = cache_to_ggtt(cache); ggtt->base.clear_range(&ggtt->base, cache->node.start, @@ -375,6 +375,9 @@ static void reloc_cache_fini(struct reloc_cache *cache) i915_vma_unpin((struct i915_vma *)cache->node.mm); } } + + cache->vaddr = 0; + cache->page = -1; } static void *reloc_kmap(struct drm_i915_gem_object *obj, @@ -413,7 +416,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, int page) { - struct i915_ggtt *ggtt = &cache->i915->ggtt; + struct i915_ggtt *ggtt = cache_to_ggtt(cache); unsigned long offset; void *vaddr; @@ -472,7 +475,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset += page << PAGE_SHIFT; } - vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset); + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); cache->page = page; cache->vaddr = (unsigned long)vaddr; @@ -565,12 +568,10 @@ static bool object_is_idle(struct drm_i915_gem_object *obj) } static int -i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, - struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc, - struct reloc_cache *cache) +eb_relocate_entry(struct drm_i915_gem_object *obj, + struct i915_execbuffer *eb, + struct drm_i915_gem_relocation_entry *reloc) { - struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_i915_obj; struct i915_vma *target_vma; @@ -589,8 +590,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and * pipe_control writes because the gpu doesn't properly redirect them * through the ppgtt for non_secure batchbuffers. */ - if (unlikely(IS_GEN6(dev) && - reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { + if (unlikely(IS_GEN6(eb->i915) && + reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, PIN_GLOBAL); if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) @@ -631,7 +632,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { + obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -651,7 +652,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; - ret = relocate_entry(obj, reloc, cache, target_offset); + ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset); if (ret) return ret; @@ -660,19 +661,15 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return 0; } -static int -i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, - struct eb_vmas *eb) +static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct reloc_cache cache; int remain, ret = 0; user_relocs = u64_to_user_ptr(entry->relocs_ptr); - reloc_cache_init(&cache, eb->i915); remain = entry->relocation_count; while (remain) { @@ -690,7 +687,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache); + ret = eb_relocate_entry(vma->obj, eb, r); if (ret) goto out; @@ -707,33 +704,29 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, } out: - reloc_cache_fini(&cache); + reloc_cache_reset(&eb->reloc_cache); return ret; #undef N_RELOC } static int -i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, - struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *relocs) +eb_relocate_vma_slow(struct i915_vma *vma, + struct i915_execbuffer *eb, + struct drm_i915_gem_relocation_entry *relocs) { const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct reloc_cache cache; int i, ret = 0; - reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); + ret = eb_relocate_entry(vma->obj, eb, &relocs[i]); if (ret) break; } - reloc_cache_fini(&cache); - + reloc_cache_reset(&eb->reloc_cache); return ret; } -static int -i915_gem_execbuffer_relocate(struct eb_vmas *eb) +static int eb_relocate(struct i915_execbuffer *eb) { struct i915_vma *vma; int ret = 0; @@ -747,7 +740,7 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) */ pagefault_disable(); list_for_each_entry(vma, &eb->vmas, exec_list) { - ret = i915_gem_execbuffer_relocate_vma(vma, eb); + ret = eb_relocate_vma(vma, eb); if (ret) break; } @@ -763,9 +756,9 @@ static bool only_mappable_for_reloc(unsigned int flags) } static int -i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, - struct intel_engine_cs *engine, - bool *need_reloc) +eb_reserve_vma(struct i915_vma *vma, + struct intel_engine_cs *engine, + bool *need_reloc) { struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; @@ -885,33 +878,26 @@ eb_vma_misplaced(struct i915_vma *vma) return false; } -static int -i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, - struct list_head *vmas, - struct i915_gem_context *ctx, - bool *need_relocs) +static int eb_reserve(struct i915_execbuffer *eb) { + const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; struct drm_i915_gem_object *obj; struct i915_vma *vma; - struct i915_address_space *vm; struct list_head ordered_vmas; struct list_head pinned_vmas; - bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; int retry; - vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; - INIT_LIST_HEAD(&ordered_vmas); INIT_LIST_HEAD(&pinned_vmas); - while (!list_empty(vmas)) { + while (!list_empty(&eb->vmas)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; - vma = list_first_entry(vmas, struct i915_vma, exec_list); + vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); obj = vma->obj; entry = vma->exec_entry; - if (ctx->flags & CONTEXT_NO_ZEROMAP) + if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; if (!has_fenced_gpu_access) @@ -932,8 +918,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; } - list_splice(&ordered_vmas, vmas); - list_splice(&pinned_vmas, vmas); + list_splice(&ordered_vmas, &eb->vmas); + list_splice(&pinned_vmas, &eb->vmas); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: @@ -952,27 +938,24 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, int ret = 0; /* Unbind any ill-fitting objects or pin. */ - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { if (!drm_mm_node_allocated(&vma->node)) continue; if (eb_vma_misplaced(vma)) ret = i915_vma_unbind(vma); else - ret = i915_gem_execbuffer_reserve_vma(vma, - engine, - need_relocs); + ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); if (ret) goto err; } /* Bind fresh objects */ - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { if (drm_mm_node_allocated(&vma->node)) continue; - ret = i915_gem_execbuffer_reserve_vma(vma, engine, - need_relocs); + ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); if (ret) goto err; } @@ -982,46 +965,37 @@ err: return ret; /* Decrement pin count for bound objects */ - list_for_each_entry(vma, vmas, exec_list) - i915_gem_execbuffer_unreserve_vma(vma); + list_for_each_entry(vma, &eb->vmas, exec_list) + eb_unreserve_vma(vma); - ret = i915_gem_evict_vm(vm, true); + ret = i915_gem_evict_vm(eb->vm, true); if (ret) return ret; } while (1); } static int -i915_gem_execbuffer_relocate_slow(struct drm_device *dev, - struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file, - struct intel_engine_cs *engine, - struct eb_vmas *eb, - struct drm_i915_gem_exec_object2 *exec, - struct i915_gem_context *ctx) +eb_relocate_slow(struct i915_execbuffer *eb) { + const unsigned int count = eb->args->buffer_count; + struct drm_device *dev = &eb->i915->drm; struct drm_i915_gem_relocation_entry *reloc; - struct i915_address_space *vm; struct i915_vma *vma; - bool need_relocs; int *reloc_offset; int i, total, ret; - unsigned count = args->buffer_count; - - vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; /* We may process another execbuffer during the unlock... */ while (!list_empty(&eb->vmas)) { vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); - i915_gem_execbuffer_unreserve_vma(vma); + eb_unreserve_vma(vma); } mutex_unlock(&dev->struct_mutex); total = 0; for (i = 0; i < count; i++) - total += exec[i].relocation_count; + total += eb->exec[i].relocation_count; reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); reloc = drm_malloc_ab(total, sizeof(*reloc)); @@ -1038,10 +1012,10 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, u64 invalid_offset = (u64)-1; int j; - user_relocs = u64_to_user_ptr(exec[i].relocs_ptr); + user_relocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); if (copy_from_user(reloc+total, user_relocs, - exec[i].relocation_count * sizeof(*reloc))) { + eb->exec[i].relocation_count * sizeof(*reloc))) { ret = -EFAULT; mutex_lock(&dev->struct_mutex); goto err; @@ -1056,7 +1030,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, * happened we would make the mistake of assuming that the * relocations were valid. */ - for (j = 0; j < exec[i].relocation_count; j++) { + for (j = 0; j < eb->exec[i].relocation_count; j++) { if (__copy_to_user(&user_relocs[j].presumed_offset, &invalid_offset, sizeof(invalid_offset))) { @@ -1067,7 +1041,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, } reloc_offset[i] = total; - total += exec[i].relocation_count; + total += eb->exec[i].relocation_count; } ret = i915_mutex_lock_interruptible(dev); @@ -1078,20 +1052,18 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, /* reacquire the objects */ eb_reset(eb); - ret = eb_lookup_vmas(eb, exec, args, vm, file); + ret = eb_lookup_vmas(eb); if (ret) goto err; - need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx, - &need_relocs); + ret = eb_reserve(eb); if (ret) goto err; list_for_each_entry(vma, &eb->vmas, exec_list) { - int offset = vma->exec_entry - exec; - ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, - reloc + reloc_offset[offset]); + int idx = vma->exec_entry - eb->exec; + + ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[idx]); if (ret) goto err; } @@ -1108,29 +1080,28 @@ err: return ret; } -static unsigned int eb_other_engines(struct drm_i915_gem_request *req) +static unsigned int eb_other_engines(struct i915_execbuffer *eb) { unsigned int mask; - mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; + mask = ~intel_engine_flag(eb->engine) & I915_BO_ACTIVE_MASK; mask <<= I915_BO_ACTIVE_SHIFT; return mask; } static int -i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, - struct list_head *vmas) +eb_move_to_gpu(struct i915_execbuffer *eb) { - const unsigned int other_rings = eb_other_engines(req); + const unsigned int other_rings = eb_other_engines(eb); struct i915_vma *vma; int ret; - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; if (obj->flags & other_rings) { - ret = i915_gem_object_sync(obj, req); + ret = i915_gem_object_sync(obj, eb->request); if (ret) return ret; } @@ -1140,10 +1111,10 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, } /* Unconditionally flush any chipset caches (for streaming writes). */ - i915_gem_chipset_flush(req->engine->i915); + i915_gem_chipset_flush(eb->i915); /* Unconditionally invalidate GPU caches and TLBs. */ - return req->engine->emit_flush(req, EMIT_INVALIDATE); + return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); } static bool @@ -1246,24 +1217,25 @@ validate_exec_list(struct drm_device *dev, return 0; } -static struct i915_gem_context * -i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *engine, const u32 ctx_id) +static int eb_select_context(struct i915_execbuffer *eb) { struct i915_gem_context *ctx; - struct i915_ctx_hang_stats *hs; + unsigned int ctx_id; - ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); - if (IS_ERR(ctx)) - return ctx; + ctx_id = i915_execbuffer2_get_context_id(*eb->args); + ctx = i915_gem_context_lookup(eb->file->driver_priv, ctx_id); + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); - hs = &ctx->hang_stats; - if (hs->banned) { + if (unlikely(ctx->hang_stats.banned)) { DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); - return ERR_PTR(-EIO); + return -EIO; } - return ctx; + eb->ctx = i915_gem_context_get(ctx); + eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; + + return 0; } void i915_vma_move_to_active(struct i915_vma *vma, @@ -1325,12 +1297,11 @@ static void eb_export_fence(struct drm_i915_gem_object *obj, } static void -i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct drm_i915_gem_request *req) +eb_move_to_active(struct i915_execbuffer *eb) { struct i915_vma *vma; - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; @@ -1342,8 +1313,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, req, vma->exec_entry->flags); - eb_export_fence(obj, req, vma->exec_entry->flags); + i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); + eb_export_fence(obj, eb->request, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); } } @@ -1374,29 +1345,22 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct i915_vma * -i915_gem_execbuffer_parse(struct intel_engine_cs *engine, - struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct drm_i915_gem_object *batch_obj, - struct eb_vmas *eb, - u32 batch_start_offset, - u32 batch_len, - bool is_master) +static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; int ret; - shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, - PAGE_ALIGN(batch_len)); + shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, + PAGE_ALIGN(eb->args->batch_len)); if (IS_ERR(shadow_batch_obj)) return ERR_CAST(shadow_batch_obj); - ret = intel_engine_cmd_parser(engine, - batch_obj, + ret = intel_engine_cmd_parser(eb->engine, + eb->batch->obj, shadow_batch_obj, - batch_start_offset, - batch_len, + eb->args->batch_start_offset, + eb->args->batch_len, is_master); if (ret) { if (ret == -EACCES) /* unhandled chained batch */ @@ -1410,9 +1374,8 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, if (IS_ERR(vma)) goto out; - memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); - - vma->exec_entry = shadow_exec_entry; + vma->exec_entry = + memset(&eb->shadow_exec_entry, 0, sizeof(*vma->exec_entry)); vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; list_add_tail(&vma->exec_list, &eb->vmas); @@ -1430,49 +1393,45 @@ add_to_client(struct drm_i915_gem_request *req, } static int -execbuf_submit(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) +execbuf_submit(struct i915_execbuffer *eb) { - struct drm_i915_private *dev_priv = params->request->i915; - u64 exec_start, exec_len; int instp_mode; u32 instp_mask; int ret; - ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); + ret = eb_move_to_gpu(eb); if (ret) return ret; - ret = i915_switch_context(params->request); + ret = i915_switch_context(eb->request); if (ret) return ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; + instp_mode = eb->args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && params->engine->id != RCS) { + if (instp_mode != 0 && eb->engine->id != RCS) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } - if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev_priv)->gen < 4) { + if (instp_mode != eb->i915->relative_constants_mode) { + if (INTEL_INFO(eb->i915)->gen < 4) { DRM_DEBUG("no rel constants on pre-gen4\n"); return -EINVAL; } - if (INTEL_INFO(dev_priv)->gen > 5 && + if (INTEL_INFO(eb->i915)->gen > 5 && instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); return -EINVAL; } /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev_priv)->gen >= 6) + if (INTEL_INFO(eb->i915)->gen >= 6) instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; @@ -1481,11 +1440,11 @@ execbuf_submit(struct i915_execbuffer_params *params, return -EINVAL; } - if (params->engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - struct intel_ring *ring = params->request->ring; + if (eb->engine->id == RCS && + instp_mode != eb->i915->relative_constants_mode) { + struct intel_ring *ring = eb->request->ring; - ret = intel_ring_begin(params->request, 4); + ret = intel_ring_begin(eb->request, 4); if (ret) return ret; @@ -1495,32 +1454,27 @@ execbuf_submit(struct i915_execbuffer_params *params, intel_ring_emit(ring, instp_mask << 16 | instp_mode); intel_ring_advance(ring); - dev_priv->relative_constants_mode = instp_mode; + eb->i915->relative_constants_mode = instp_mode; } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(params->request); + if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { + ret = i915_reset_gen7_sol_offsets(eb->request); if (ret) return ret; } - exec_len = args->batch_len; - exec_start = params->batch->node.start + - params->args_batch_start_offset; - - if (exec_len == 0) - exec_len = params->batch->size - params->args_batch_start_offset; - - ret = params->engine->emit_bb_start(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = eb->engine->emit_bb_start(eb->request, + eb->batch->node.start + + eb->batch_start_offset, + eb->args->batch_len, + eb->dispatch_flags); if (ret) return ret; - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); + trace_i915_gem_ring_dispatch(eb->request, eb->dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params->request); - add_to_client(params->request, params->file); + eb_move_to_active(eb); + add_to_client(eb->request, eb->file); return 0; } @@ -1606,24 +1560,13 @@ eb_select_engine(struct drm_i915_private *dev_priv, } static int -i915_gem_do_execbuffer(struct drm_device *dev, void *data, +i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct drm_i915_gem_exec_object2 *exec) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct eb_vmas *eb; - struct drm_i915_gem_exec_object2 shadow_exec_entry; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct i915_execbuffer_params params_master; /* XXX: will be removed later */ - struct i915_execbuffer_params *params = ¶ms_master; - const u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u32 dispatch_flags; + struct i915_execbuffer eb; int ret; - bool need_relocs; if (!i915_gem_check_execbuffer(args)) return -EINVAL; @@ -1632,37 +1575,39 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) return ret; - dispatch_flags = 0; + eb.i915 = to_i915(dev); + eb.file = file; + eb.args = args; + eb.exec = exec; + eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; + reloc_cache_init(&eb.reloc_cache, eb.i915); + + eb.dispatch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) return -EPERM; - dispatch_flags |= I915_DISPATCH_SECURE; + eb.dispatch_flags |= I915_DISPATCH_SECURE; } if (args->flags & I915_EXEC_IS_PINNED) - dispatch_flags |= I915_DISPATCH_PINNED; - - engine = eb_select_engine(dev_priv, file, args); - if (!engine) - return -EINVAL; + eb.dispatch_flags |= I915_DISPATCH_PINNED; - if (args->buffer_count < 1) { - DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); + eb.engine = eb_select_engine(eb.i915, file, args); + if (!eb.engine) return -EINVAL; - } if (args->flags & I915_EXEC_RESOURCE_STREAMER) { - if (!HAS_RESOURCE_STREAMER(dev)) { + if (!HAS_RESOURCE_STREAMER(eb.i915)) { DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); return -EINVAL; } - if (engine->id != RCS) { + if (eb.engine->id != RCS) { DRM_DEBUG("RS is not available on %s\n", - engine->name); + eb.engine->name); return -EINVAL; } - dispatch_flags |= I915_DISPATCH_RS; + eb.dispatch_flags |= I915_DISPATCH_RS; } /* Take a local wakeref for preparing to dispatch the execbuf as @@ -1671,59 +1616,44 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - intel_runtime_pm_get(dev_priv); + intel_runtime_pm_get(eb.i915); ret = i915_mutex_lock_interruptible(dev); if (ret) goto pre_mutex_err; - ctx = i915_gem_validate_context(dev, file, engine, ctx_id); - if (IS_ERR(ctx)) { + ret = eb_select_context(&eb); + if (ret) { mutex_unlock(&dev->struct_mutex); - ret = PTR_ERR(ctx); goto pre_mutex_err; } - i915_gem_context_get(ctx); - - if (ctx->ppgtt) - vm = &ctx->ppgtt->base; - else - vm = &ggtt->base; - - memset(¶ms_master, 0x00, sizeof(params_master)); - - eb = eb_create(dev_priv, args); - if (eb == NULL) { - i915_gem_context_put(ctx); + if (eb_create(&eb)) { + i915_gem_context_put(eb.ctx); mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; goto pre_mutex_err; } /* Look up object handles */ - ret = eb_lookup_vmas(eb, exec, args, vm, file); + ret = eb_lookup_vmas(&eb); if (ret) goto err; /* take note of the batch buffer before we might reorder the lists */ - params->batch = eb_get_batch(eb); + eb.batch = eb_get_batch(&eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ - need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx, - &need_relocs); + ret = eb_reserve(&eb); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ - if (need_relocs) - ret = i915_gem_execbuffer_relocate(eb); + if (eb.need_relocs) + ret = eb_relocate(&eb); if (ret) { if (ret == -EFAULT) { - ret = i915_gem_execbuffer_relocate_slow(dev, args, file, - engine, - eb, exec, ctx); + ret = eb_relocate_slow(&eb); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } if (ret) @@ -1731,28 +1661,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (params->batch->obj->base.pending_write_domain) { + if (eb.batch->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } - if (args->batch_start_offset > params->batch->size || - args->batch_len > params->batch->size - args->batch_start_offset) { + if (args->batch_start_offset > eb.batch->size || + args->batch_len > eb.batch->size - args->batch_start_offset) { DRM_DEBUG("Attempting to use out-of-bounds batch\n"); ret = -EINVAL; goto err; } - params->args_batch_start_offset = args->batch_start_offset; - if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { + eb.batch_start_offset = args->batch_start_offset; + if (intel_engine_needs_cmd_parser(eb.engine) && args->batch_len) { struct i915_vma *vma; - vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, - params->batch->obj, - eb, - args->batch_start_offset, - args->batch_len, - drm_is_current_master(file)); + vma = eb_parse(&eb, drm_is_current_master(file)); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -1768,19 +1693,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * specifically don't want that set on batches the * command parser has accepted. */ - dispatch_flags |= I915_DISPATCH_SECURE; - params->args_batch_start_offset = 0; - params->batch = vma; + eb.dispatch_flags |= I915_DISPATCH_SECURE; + eb.batch_start_offset = 0; + eb.batch = vma; } } - params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + eb.batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + if (args->batch_len == 0) + args->batch_len = eb.batch->size - eb.batch_start_offset; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (dispatch_flags & I915_DISPATCH_SECURE) { - struct drm_i915_gem_object *obj = params->batch->obj; + if (eb.dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = eb.batch->obj; struct i915_vma *vma; /* @@ -1799,13 +1726,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - params->batch = vma; + eb.batch = vma; } /* Allocate a request for this batch buffer nice and early. */ - params->request = i915_gem_request_alloc(engine, ctx); - if (IS_ERR(params->request)) { - ret = PTR_ERR(params->request); + eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); + if (IS_ERR(eb.request)) { + ret = PTR_ERR(eb.request); goto err_batch_unpin; } @@ -1815,22 +1742,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ - params->request->batch = params->batch; + eb.request->batch = eb.batch; - /* - * Save assorted stuff away to pass through to *_submission(). - * NB: This data should be 'persistent' and not local as it will - * kept around beyond the duration of the IOCTL once the GPU - * scheduler arrives. - */ - params->dev = dev; - params->file = file; - params->engine = engine; - params->dispatch_flags = dispatch_flags; - params->ctx = ctx; - - ret = execbuf_submit(params, args, &eb->vmas); - __i915_add_request(params->request, ret == 0); + ret = execbuf_submit(&eb); + __i915_add_request(eb.request, ret == 0); err_batch_unpin: /* @@ -1839,19 +1754,17 @@ err_batch_unpin: * needs to be adjusted to also track the ggtt batch vma properly as * active. */ - if (dispatch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(params->batch); + if (eb.dispatch_flags & I915_DISPATCH_SECURE) + i915_vma_unpin(eb.batch); err: /* the request owns the ref now */ - i915_gem_context_put(ctx); - eb_destroy(eb); - + eb_destroy(&eb); mutex_unlock(&dev->struct_mutex); pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(eb.i915); return ret; } @@ -1918,7 +1831,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); + ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); if (!ret) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -1982,7 +1895,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); + ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ struct drm_i915_gem_exec_object2 __user *user_exec_list = -- 2.9.3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx