Combine the two slightly overlapping parameter structures we pass around the execbuffer routines into one. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 539 +++++++++++++---------------- 1 file changed, 231 insertions(+), 308 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7014523cd890..2a535eb35dff 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -43,70 +43,71 @@ #define BATCH_OFFSET_BIAS (256*1024) -struct i915_execbuffer_params { - struct drm_device *dev; - struct drm_file *file; - struct i915_vma *batch_vma; - uint32_t dispatch_flags; - uint32_t args_batch_start_offset; - struct intel_engine_cs *ring; - struct intel_context *ctx; - struct drm_i915_gem_request *request; -}; - -struct eb_vmas { +struct i915_execbuffer { struct drm_i915_private *i915; + struct drm_file *file; + struct drm_i915_gem_execbuffer2 *args; + struct drm_i915_gem_exec_object2 *exec; + struct intel_engine_cs *engine; + struct intel_context *ctx; + struct i915_address_space *vm; + struct i915_vma *batch_vma; + uint32_t batch_start_offset; + struct drm_i915_gem_request *request; + unsigned dispatch_flags; + bool need_relocs; struct list_head vmas; + struct reloc_cache { + unsigned long vaddr; + unsigned page; + struct drm_mm_node node; + bool use_64bit_reloc; + } reloc_cache; int and; union { - struct i915_vma *lut[0]; - struct hlist_head buckets[0]; + struct i915_vma **lut; + struct hlist_head *buckets; }; }; -static struct eb_vmas * -eb_create(struct drm_i915_private *i915, - struct drm_i915_gem_execbuffer2 *args) +static int +eb_create(struct i915_execbuffer *eb) { - struct eb_vmas *eb = NULL; - - if (args->flags & I915_EXEC_HANDLE_LUT) { - unsigned size = args->buffer_count; + eb->lut = NULL; + if (eb->args->flags & I915_EXEC_HANDLE_LUT) { + unsigned size = eb->args->buffer_count; size *= sizeof(struct i915_vma *); - size += sizeof(struct eb_vmas); - eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + eb->lut = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); } - if (eb == NULL) { - unsigned size = args->buffer_count; + if (eb->lut == NULL) { + unsigned size = eb->args->buffer_count; unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2; BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); while (count > 2*size) count >>= 1; - eb = kzalloc(count*sizeof(struct hlist_head) + - sizeof(struct eb_vmas), - GFP_TEMPORARY); - if (eb == NULL) - return eb; + eb->lut = kzalloc(count*sizeof(struct hlist_head), + GFP_TEMPORARY); + if (eb->lut == NULL) + return -ENOMEM; eb->and = count - 1; } else - eb->and = -args->buffer_count; + eb->and = -eb->args->buffer_count; - eb->i915 = i915; INIT_LIST_HEAD(&eb->vmas); - return eb; + return 0; } static void -eb_reset(struct eb_vmas *eb) +eb_reset(struct i915_execbuffer *eb) { if (eb->and >= 0) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } static struct i915_vma * -eb_get_batch(struct eb_vmas *eb) +eb_get_batch(struct i915_execbuffer *eb) { struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); @@ -126,41 +127,37 @@ eb_get_batch(struct eb_vmas *eb) } static int -eb_lookup_vmas(struct eb_vmas *eb, - struct drm_i915_gem_exec_object2 *exec, - const struct drm_i915_gem_execbuffer2 *args, - struct i915_address_space *vm, - struct drm_file *file) +eb_lookup_vmas(struct i915_execbuffer *eb) { struct drm_i915_gem_object *obj; struct list_head objects; int i, ret; INIT_LIST_HEAD(&objects); - spin_lock(&file->table_lock); + spin_lock(&eb->file->table_lock); /* Grab a reference to the object and release the lock so we can lookup * or create the VMA without using GFP_ATOMIC */ - for (i = 0; i < args->buffer_count; i++) { - obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); + for (i = 0; i < eb->args->buffer_count; i++) { + obj = to_intel_bo(idr_find(&eb->file->object_idr, eb->exec[i].handle)); if (obj == NULL) { - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); DRM_DEBUG("Invalid object handle %d at index %d\n", - exec[i].handle, i); + eb->exec[i].handle, i); ret = -ENOENT; goto err; } if (!list_empty(&obj->obj_exec_link)) { - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", - obj, exec[i].handle, i); + obj, eb->exec[i].handle, i); ret = -EINVAL; goto err; } list_add_tail(&obj->obj_exec_link, &objects); } - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); i = 0; while (!list_empty(&objects)) { @@ -178,7 +175,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL); + vma = i915_gem_obj_lookup_or_create_vma(obj, eb->vm, NULL); if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -189,11 +186,13 @@ eb_lookup_vmas(struct eb_vmas *eb, list_add_tail(&vma->exec_list, &eb->vmas); list_del_init(&obj->obj_exec_link); - vma->exec_entry = &exec[i]; + vma->exec_entry = &eb->exec[i]; if (eb->and < 0) { eb->lut[i] = vma; } else { - uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; + u32 handle = + eb->args->flags & I915_EXEC_HANDLE_LUT ? + i : eb->exec[i].handle; vma->exec_handle = handle; hlist_add_head(&vma->exec_node, &eb->buckets[handle & eb->and]); @@ -220,7 +219,7 @@ err: return ret; } -static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) +static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) { if (eb->and < 0) { if (handle >= -eb->and) @@ -243,7 +242,7 @@ static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) } static void -i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) +eb_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; @@ -261,8 +260,10 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } -static void eb_destroy(struct eb_vmas *eb) +static void eb_destroy(struct i915_execbuffer *eb) { + i915_gem_context_unreference(eb->ctx); + while (!list_empty(&eb->vmas)) { struct i915_vma *vma; @@ -270,9 +271,8 @@ static void eb_destroy(struct eb_vmas *eb) struct i915_vma, exec_list); list_del_init(&vma->exec_list); - i915_gem_execbuffer_unreserve_vma(vma); + eb_unreserve_vma(vma); } - kfree(eb); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) @@ -308,21 +308,12 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target_offset); } -struct reloc_cache { - struct drm_i915_private *i915; - unsigned long vaddr; - unsigned page; - struct drm_mm_node node; - bool use_64bit_reloc; -}; - static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { cache->page = -1; cache->vaddr = 0; - cache->i915 = i915; - cache->use_64bit_reloc = INTEL_INFO(cache->i915)->gen >= 8; + cache->use_64bit_reloc = INTEL_INFO(i915)->gen >= 8; } static inline void *unmask_page(unsigned long p) @@ -337,7 +328,7 @@ static inline unsigned unmask_flags(unsigned long p) #define KMAP 0x4 -static void reloc_cache_fini(struct reloc_cache *cache) +static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; @@ -355,6 +346,9 @@ static void reloc_cache_fini(struct reloc_cache *cache) io_mapping_unmap_atomic(vaddr); i915_vma_unpin((struct i915_vma *)cache->node.mm); } + + cache->vaddr = 0; + cache->page = -1; } static void *reloc_kmap(struct drm_i915_gem_object *obj, @@ -386,6 +380,14 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, return vaddr; } +static struct io_mapping * +to_io_mapping(struct reloc_cache *cache) +{ + struct drm_i915_private *i915 = + container_of(cache, struct i915_execbuffer, reloc_cache)->i915; + return &i915->gtt.mappable; +} + static void *reloc_iomap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, int page) @@ -418,7 +420,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, cache->node.mm = (void *)vma; } - vaddr = io_mapping_map_atomic_wc(&cache->i915->gtt.mappable, + vaddr = io_mapping_map_atomic_wc(to_io_mapping(cache), cache->node.start + (page << PAGE_SHIFT)); cache->page = page; cache->vaddr = (unsigned long)vaddr; @@ -499,12 +501,10 @@ repeat: } static int -i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, - struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc, - struct reloc_cache *cache) +eb_relocate_entry(struct drm_i915_gem_object *obj, + struct i915_execbuffer *eb, + struct drm_i915_gem_relocation_entry *reloc) { - struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_i915_obj; struct i915_vma *target_vma; @@ -523,7 +523,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and * pipe_control writes because the gpu doesn't properly redirect them * through the ppgtt for non_secure batchbuffers. */ - if (unlikely(IS_GEN6(dev) && + if (unlikely(IS_GEN6(eb->i915) && reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, PIN_GLOBAL); @@ -565,7 +565,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { + obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -585,7 +585,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (i915_gem_object_is_active(obj) && pagefault_disabled()) return -EFAULT; - ret = relocate_entry(obj, reloc, cache, target_offset); + ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset); if (ret) return ret; @@ -594,19 +594,15 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return 0; } -static int -i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, - struct eb_vmas *eb) +static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct reloc_cache cache; int remain, ret = 0; user_relocs = to_user_ptr(entry->relocs_ptr); - reloc_cache_init(&cache, eb->i915); remain = entry->relocation_count; while (remain) { @@ -624,7 +620,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache); + ret = eb_relocate_entry(vma->obj, eb, r); if (ret) goto out; @@ -642,33 +638,29 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, } out: - reloc_cache_fini(&cache); + reloc_cache_reset(&eb->reloc_cache); return ret; #undef N_RELOC } static int -i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, - struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *relocs) +eb_relocate_vma_slow(struct i915_vma *vma, + struct i915_execbuffer *eb, + struct drm_i915_gem_relocation_entry *relocs) { const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct reloc_cache cache; int i, ret = 0; - reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); + ret = eb_relocate_entry(vma->obj, eb, &relocs[i]); if (ret) break; } - reloc_cache_fini(&cache); - + reloc_cache_reset(&eb->reloc_cache); return ret; } -static int -i915_gem_execbuffer_relocate(struct eb_vmas *eb) +static int eb_relocate(struct i915_execbuffer *eb) { struct i915_vma *vma; int ret = 0; @@ -682,7 +674,7 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) */ pagefault_disable(); list_for_each_entry(vma, &eb->vmas, exec_list) { - ret = i915_gem_execbuffer_relocate_vma(vma, eb); + ret = eb_relocate_vma(vma, eb); if (ret) break; } @@ -698,9 +690,9 @@ static bool only_mappable_for_reloc(unsigned int flags) } static int -i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, - struct intel_engine_cs *ring, - bool *need_reloc) +eb_reserve_vma(struct i915_vma *vma, + struct intel_engine_cs *ring, + bool *need_reloc) { struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; @@ -818,33 +810,26 @@ eb_vma_misplaced(struct i915_vma *vma) return false; } -static int -i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, - struct list_head *vmas, - struct intel_context *ctx, - bool *need_relocs) +static int eb_reserve(struct i915_execbuffer *eb) { + const bool has_fenced_gpu_access = INTEL_INFO(eb->i915)->gen < 4; struct drm_i915_gem_object *obj; struct i915_vma *vma; - struct i915_address_space *vm; struct list_head ordered_vmas; struct list_head pinned_vmas; - bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; - vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; - INIT_LIST_HEAD(&ordered_vmas); INIT_LIST_HEAD(&pinned_vmas); - while (!list_empty(vmas)) { + while (!list_empty(&eb->vmas)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; - vma = list_first_entry(vmas, struct i915_vma, exec_list); + vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); obj = vma->obj; entry = vma->exec_entry; - if (ctx->flags & CONTEXT_NO_ZEROMAP) + if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; if (!has_fenced_gpu_access) @@ -865,8 +850,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; } - list_splice(&ordered_vmas, vmas); - list_splice(&pinned_vmas, vmas); + list_splice(&ordered_vmas, &eb->vmas); + list_splice(&pinned_vmas, &eb->vmas); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: @@ -885,24 +870,24 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, int ret = 0; /* Unbind any ill-fitting objects or pin. */ - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { if (!drm_mm_node_allocated(&vma->node)) continue; if (eb_vma_misplaced(vma)) ret = i915_vma_unbind(vma); else - ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); + ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); if (ret) goto err; } /* Bind fresh objects */ - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { if (drm_mm_node_allocated(&vma->node)) continue; - ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); + ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); if (ret) goto err; } @@ -912,46 +897,37 @@ err: return ret; /* Decrement pin count for bound objects */ - list_for_each_entry(vma, vmas, exec_list) - i915_gem_execbuffer_unreserve_vma(vma); + list_for_each_entry(vma, &eb->vmas, exec_list) + eb_unreserve_vma(vma); - ret = i915_gem_evict_vm(vm, true); + ret = i915_gem_evict_vm(eb->vm, true); if (ret) return ret; } while (1); } static int -i915_gem_execbuffer_relocate_slow(struct drm_device *dev, - struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file, - struct intel_engine_cs *ring, - struct eb_vmas *eb, - struct drm_i915_gem_exec_object2 *exec, - struct intel_context *ctx) +eb_relocate_slow(struct i915_execbuffer *eb) { + const unsigned count = eb->args->buffer_count; + struct drm_device *dev = eb->i915->dev; struct drm_i915_gem_relocation_entry *reloc; - struct i915_address_space *vm; struct i915_vma *vma; - bool need_relocs; int *reloc_offset; int i, total, ret; - unsigned count = args->buffer_count; - - vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; /* We may process another execbuffer during the unlock... */ while (!list_empty(&eb->vmas)) { vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); - i915_gem_execbuffer_unreserve_vma(vma); + eb_unreserve_vma(vma); } mutex_unlock(&dev->struct_mutex); total = 0; for (i = 0; i < count; i++) - total += exec[i].relocation_count; + total += eb->exec[i].relocation_count; reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); reloc = drm_malloc_ab(total, sizeof(*reloc)); @@ -968,10 +944,10 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, u64 invalid_offset = (u64)-1; int j; - user_relocs = to_user_ptr(exec[i].relocs_ptr); + user_relocs = to_user_ptr(eb->exec[i].relocs_ptr); if (copy_from_user(reloc+total, user_relocs, - exec[i].relocation_count * sizeof(*reloc))) { + eb->exec[i].relocation_count * sizeof(*reloc))) { ret = -EFAULT; mutex_lock(&dev->struct_mutex); goto err; @@ -986,7 +962,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, * happened we would make the mistake of assuming that the * relocations were valid. */ - for (j = 0; j < exec[i].relocation_count; j++) { + for (j = 0; j < eb->exec[i].relocation_count; j++) { if (__copy_to_user(&user_relocs[j].presumed_offset, &invalid_offset, sizeof(invalid_offset))) { @@ -997,7 +973,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, } reloc_offset[i] = total; - total += exec[i].relocation_count; + total += eb->exec[i].relocation_count; } ret = i915_mutex_lock_interruptible(dev); @@ -1008,19 +984,17 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, /* reacquire the objects */ eb_reset(eb); - ret = eb_lookup_vmas(eb, exec, args, vm, file); + ret = eb_lookup_vmas(eb); if (ret) goto err; - need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs); + ret = eb_reserve(eb); if (ret) goto err; list_for_each_entry(vma, &eb->vmas, exec_list) { - int offset = vma->exec_entry - exec; - ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, - reloc + reloc_offset[offset]); + int offset = vma->exec_entry - eb->exec; + ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[offset]); if (ret) goto err; } @@ -1038,20 +1012,19 @@ err: } static int -i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, - struct list_head *vmas) +eb_move_to_gpu(struct i915_execbuffer *eb) { - const unsigned other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT; + const unsigned other_rings = (~intel_engine_flag(eb->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT; struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; int ret; - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; if (obj->flags & other_rings) { - ret = i915_gem_object_sync(obj, req); + ret = i915_gem_object_sync(obj, eb->request); if (ret) return ret; } @@ -1063,13 +1036,13 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, } if (flush_chipset) - i915_gem_chipset_flush(req->engine->dev); + i915_gem_chipset_flush(eb->i915->dev); /* Make sure (untracked) CPU relocs/parsing are flushed */ wmb(); /* Unconditionally invalidate gpu caches and TLBs. */ - return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); + return eb->engine->emit_flush(eb->request, I915_GEM_GPU_DOMAINS, 0); } static bool @@ -1168,24 +1141,26 @@ validate_exec_list(struct drm_device *dev, return 0; } -static struct intel_context * -i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, const u32 ctx_id) +static int eb_select_context(struct i915_execbuffer *eb) { struct intel_context *ctx; - struct i915_ctx_hang_stats *hs; + unsigned ctx_id; - ctx = i915_gem_context_get(file->driver_priv, ctx_id); - if (IS_ERR(ctx)) - return ctx; + ctx_id = i915_execbuffer2_get_context_id(*eb->args); + ctx = i915_gem_context_get(eb->file->driver_priv, ctx_id); + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); - hs = &ctx->hang_stats; - if (hs->banned) { + if (unlikely(ctx->hang_stats.banned)) { DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); - return ERR_PTR(-EIO); + return -EIO; } - return ctx; + eb->ctx = ctx; + i915_gem_context_reference(ctx); + eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->gtt.base; + + return 0; } void i915_vma_move_to_active(struct i915_vma *vma, @@ -1225,12 +1200,11 @@ void i915_vma_move_to_active(struct i915_vma *vma, } static void -i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct drm_i915_gem_request *req) +eb_move_to_active(struct i915_execbuffer *eb) { struct i915_vma *vma; - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; @@ -1242,7 +1216,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, req, vma->exec_entry->flags); + i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); } } @@ -1274,28 +1248,24 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) } static struct i915_vma * -i915_gem_execbuffer_parse(struct intel_engine_cs *ring, - struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct drm_i915_gem_object *batch_obj, - struct eb_vmas *eb, - u32 batch_start_offset, - u32 batch_len, - bool is_master) +eb_parse(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *shadow_exec_entry, + bool is_master) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; int ret; - shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool, - PAGE_ALIGN(batch_len)); + shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, + PAGE_ALIGN(eb->args->batch_len)); if (IS_ERR(shadow_batch_obj)) return ERR_CAST(shadow_batch_obj); - ret = i915_parse_cmds(ring, - batch_obj, + ret = i915_parse_cmds(eb->engine, + eb->batch_vma->obj, shadow_batch_obj, - batch_start_offset, - batch_len, + eb->args->batch_start_offset, + eb->args->batch_len, is_master); if (ret) { if (ret == -EACCES) /* unhandled chained batch */ @@ -1331,32 +1301,29 @@ add_to_client(struct drm_i915_gem_request *req, } static int -execbuf_submit(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) +execbuf_submit(struct i915_execbuffer *eb) { - struct intel_ring *ring = params->request->ring; - struct drm_i915_private *dev_priv = params->request->i915; - u64 exec_start, exec_len; + struct intel_ring *ring = eb->request->ring; + struct drm_i915_private *dev_priv = eb->i915; int instp_mode; u32 instp_mask; int ret; - ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); + ret = eb_move_to_gpu(eb); if (ret) return ret; - ret = i915_switch_context(params->request); + ret = i915_switch_context(eb->request); if (ret) return ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; + instp_mode = eb->args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && params->ring->id != RCS) { + if (instp_mode != 0 && eb->engine->id != RCS) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } @@ -1383,9 +1350,9 @@ execbuf_submit(struct i915_execbuffer_params *params, return -EINVAL; } - if (params->ring->id == RCS && + if (eb->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { - ret = intel_ring_begin(params->request, 4); + ret = intel_ring_begin(eb->request, 4); if (ret) return ret; @@ -1398,26 +1365,24 @@ execbuf_submit(struct i915_execbuffer_params *params, dev_priv->relative_constants_mode = instp_mode; } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(params->request); + if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { + ret = i915_reset_gen7_sol_offsets(eb->request); if (ret) return ret; } - exec_len = args->batch_len; - exec_start = params->batch_vma->node.start + - params->args_batch_start_offset; - - ret = params->ring->emit_bb_start(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = eb->engine->emit_bb_start(eb->request, + eb->batch_vma->node.start + + eb->batch_start_offset, + eb->args->batch_len, + eb->dispatch_flags); if (ret) return ret; - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); + trace_i915_gem_ring_dispatch(eb->request, eb->dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params->request); - add_to_client(params->request, params->file); + eb_move_to_active(eb); + add_to_client(eb->request, eb->file); return 0; } @@ -1426,19 +1391,18 @@ execbuf_submit(struct i915_execbuffer_params *params, * Find one BSD ring to dispatch the corresponding BSD command. * The Ring ID is returned. */ -static int gen8_dispatch_bsd_ring(struct drm_device *dev, - struct drm_file *file) +static struct intel_engine_cs * +gen8_dispatch_bsd_ring(struct drm_device *dev, + struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_file_private *file_priv = file->driver_priv; /* Check whether the file_priv is using one ring */ - if (file_priv->bsd_ring) - return file_priv->bsd_ring->id; - else { - /* If no, use the ping-pong mechanism to select one ring */ + if (file_priv->bsd_ring == NULL) { + struct drm_i915_private *dev_priv = dev->dev_private; int ring_id; + /* If not, use the ping-pong mechanism to select one ring */ mutex_lock(&dev->struct_mutex); if (dev_priv->mm.bsd_ring_dispatch_index == 0) { ring_id = VCS; @@ -1449,28 +1413,20 @@ static int gen8_dispatch_bsd_ring(struct drm_device *dev, } file_priv->bsd_ring = &dev_priv->ring[ring_id]; mutex_unlock(&dev->struct_mutex); - return ring_id; } + + return file_priv->bsd_ring; } static int -i915_gem_do_execbuffer(struct drm_device *dev, void *data, +i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct drm_i915_gem_exec_object2 *exec) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct eb_vmas *eb; + struct i915_execbuffer eb; struct drm_i915_gem_exec_object2 shadow_exec_entry; - struct intel_engine_cs *ring; - struct intel_context *ctx; - struct i915_address_space *vm; - struct i915_execbuffer_params params_master; /* XXX: will be removed later */ - struct i915_execbuffer_params *params = ¶ms_master; - const u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u32 dispatch_flags; int ret; - bool need_relocs; if (!i915_gem_check_execbuffer(args)) return -EINVAL; @@ -1479,15 +1435,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) return ret; - dispatch_flags = 0; + eb.dispatch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (!file->is_master || !capable(CAP_SYS_ADMIN)) return -EPERM; - dispatch_flags |= I915_DISPATCH_SECURE; + eb.dispatch_flags |= I915_DISPATCH_SECURE; } if (args->flags & I915_EXEC_IS_PINNED) - dispatch_flags |= I915_DISPATCH_PINNED; + eb.dispatch_flags |= I915_DISPATCH_PINNED; if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) { DRM_DEBUG("execbuf with unknown ring: %d\n", @@ -1502,22 +1458,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + eb.i915 = to_i915(dev); + eb.file = file; + eb.args = args; + eb.exec = exec; + eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; + reloc_cache_init(&eb.reloc_cache, eb.i915); + if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) - ring = &dev_priv->ring[RCS]; + eb.engine = &eb.i915->ring[RCS]; else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) { if (HAS_BSD2(dev)) { - int ring_id; - switch (args->flags & I915_EXEC_BSD_MASK) { case I915_EXEC_BSD_DEFAULT: - ring_id = gen8_dispatch_bsd_ring(dev, file); - ring = &dev_priv->ring[ring_id]; + eb.engine = gen8_dispatch_bsd_ring(dev, file); break; case I915_EXEC_BSD_RING1: - ring = &dev_priv->ring[VCS]; + eb.engine = &eb.i915->ring[VCS]; break; case I915_EXEC_BSD_RING2: - ring = &dev_priv->ring[VCS2]; + eb.engine = &eb.i915->ring[VCS2]; break; default: DRM_DEBUG("execbuf with unknown bsd ring: %d\n", @@ -1525,33 +1485,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } } else - ring = &dev_priv->ring[VCS]; + eb.engine = &eb.i915->ring[VCS]; } else - ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; + eb.engine = &eb.i915->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; - if (!intel_engine_initialized(ring)) { + if (!intel_engine_initialized(eb.engine)) { DRM_DEBUG("execbuf with invalid ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } - if (args->buffer_count < 1) { - DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); - return -EINVAL; - } - if (args->flags & I915_EXEC_RESOURCE_STREAMER) { - if (!HAS_RESOURCE_STREAMER(dev)) { + if (!HAS_RESOURCE_STREAMER(eb.i915)) { DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); return -EINVAL; } - if (ring->id != RCS) { + if (eb.engine->id != RCS) { DRM_DEBUG("RS is not available on %s\n", - ring->name); + eb.engine->name); return -EINVAL; } - dispatch_flags |= I915_DISPATCH_RS; + eb.dispatch_flags |= I915_DISPATCH_RS; } /* Take a local wakeref for preparing to dispatch the execbuf as @@ -1560,57 +1515,44 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - intel_runtime_pm_get(dev_priv); + intel_runtime_pm_get(eb.i915); ret = i915_mutex_lock_interruptible(dev); if (ret) goto pre_mutex_err; - ctx = i915_gem_validate_context(dev, file, ring, ctx_id); - if (IS_ERR(ctx)) { + ret = eb_select_context(&eb); + if (ret) { mutex_unlock(&dev->struct_mutex); - ret = PTR_ERR(ctx); goto pre_mutex_err; } - i915_gem_context_reference(ctx); - - if (ctx->ppgtt) - vm = &ctx->ppgtt->base; - else - vm = &dev_priv->gtt.base; - - memset(¶ms_master, 0x00, sizeof(params_master)); - - eb = eb_create(dev_priv, args); - if (eb == NULL) { - i915_gem_context_unreference(ctx); + if (eb_create(&eb)) { + i915_gem_context_unreference(eb.ctx); mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; goto pre_mutex_err; } /* Look up object handles */ - ret = eb_lookup_vmas(eb, exec, args, vm, file); + ret = eb_lookup_vmas(&eb); if (ret) goto err; /* take note of the batch buffer before we might reorder the lists */ - params->batch_vma = eb_get_batch(eb); + eb.batch_vma = eb_get_batch(&eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ - need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs); + ret = eb_reserve(&eb); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ - if (need_relocs) - ret = i915_gem_execbuffer_relocate(eb); + if (eb.need_relocs) + ret = eb_relocate(&eb); if (ret) { if (ret == -EFAULT) { - ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, - eb, exec, ctx); + ret = eb_relocate_slow(&eb); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } if (ret) @@ -1618,22 +1560,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (params->batch_vma->obj->base.pending_write_domain) { + if (eb.batch_vma->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } - params->args_batch_start_offset = args->batch_start_offset; - if (intel_engine_needs_cmd_parser(ring) && args->batch_len) { + eb.batch_start_offset = args->batch_start_offset; + if (intel_engine_needs_cmd_parser(eb.engine) && args->batch_len) { struct i915_vma *vma; - vma = i915_gem_execbuffer_parse(ring, &shadow_exec_entry, - params->batch_vma->obj, - eb, - args->batch_start_offset, - args->batch_len, - file->is_master); + vma = eb_parse(&eb, &shadow_exec_entry, file->is_master); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -1649,19 +1586,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * specifically don't want that set on batches the * command parser has accepted. */ - dispatch_flags |= I915_DISPATCH_SECURE; - params->args_batch_start_offset = 0; - params->batch_vma = vma; + eb.dispatch_flags |= I915_DISPATCH_SECURE; + eb.batch_start_offset = 0; + eb.batch_vma = vma; } } - params->batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + eb.batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (dispatch_flags & I915_DISPATCH_SECURE) { - struct drm_i915_gem_object *obj = params->batch_vma->obj; + if (eb.dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = eb.batch_vma->obj; struct i915_vma *vma; /* @@ -1680,13 +1617,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - params->batch_vma = vma; + eb.batch_vma = vma; } /* Allocate a request for this batch buffer nice and early. */ - params->request = i915_gem_request_alloc(ring, ctx); - if (IS_ERR(params->request)) { - ret = PTR_ERR(params->request); + eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); + if (IS_ERR(eb.request)) { + ret = PTR_ERR(eb.request); goto err_batch_unpin; } @@ -1696,22 +1633,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ - params->request->batch = params->batch_vma; + eb.request->batch = eb.batch_vma; - /* - * Save assorted stuff away to pass through to *_submission(). - * NB: This data should be 'persistent' and not local as it will - * kept around beyond the duration of the IOCTL once the GPU - * scheduler arrives. - */ - params->dev = dev; - params->file = file; - params->ring = ring; - params->dispatch_flags = dispatch_flags; - params->ctx = ctx; - - ret = execbuf_submit(params, args, &eb->vmas); - __i915_add_request(params->request, ret == 0); + ret = execbuf_submit(&eb); + __i915_add_request(eb.request, ret == 0); err_batch_unpin: /* @@ -1720,19 +1645,17 @@ err_batch_unpin: * needs to be adjusted to also track the ggtt batch vma properly as * active. */ - if (dispatch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(params->batch_vma); + if (eb.dispatch_flags & I915_DISPATCH_SECURE) + i915_vma_unpin(eb.batch_vma); err: /* the request owns the ref now */ - i915_gem_context_unreference(ctx); - eb_destroy(eb); - + eb_destroy(&eb); mutex_unlock(&dev->struct_mutex); pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(eb.i915); return ret; } @@ -1799,7 +1722,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); + ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); if (!ret) { struct drm_i915_gem_exec_object __user *user_exec_list = to_user_ptr(args->buffers_ptr); @@ -1863,7 +1786,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); + ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ struct drm_i915_gem_exec_object2 __user *user_exec_list = -- 2.7.0.rc3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx