Serialising insertion into each of the global GTT and ppGTT accounts for a large chunk of the current struct_mutex serialisation requireemnts. (Note that it is not just the drm_mm / gtt management itself being serialised, but the pin count and various flags.) Previously, the main blocker for replacing this mutex was the reset handing, but with the advent of "lockless" resets, we can freely take the vm->mutex and block waiting for the GPU (without fear of deadlock if the GPU hangs). We also proscribe allocations underneath vm->mutex, allowing us to take the mutex inside the shrinker, avoiding the recursive struct_mutex we previously used. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- .../gpu/drm/i915/gem/i915_gem_client_blt.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 33 ++--- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 5 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 27 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.c | 11 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 93 +------------ drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 5 +- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 60 +++++---- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 28 +--- .../drm/i915/gem/selftests/i915_gem_mman.c | 2 + drivers/gpu/drm/i915/gt/intel_context.c | 5 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/gvt/aperture_gm.c | 22 ++-- drivers/gpu/drm/i915/i915_debugfs.c | 10 +- drivers/gpu/drm/i915/i915_gem.c | 123 +++++++++++------- drivers/gpu/drm/i915/i915_gem_evict.c | 28 +--- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 71 +++++----- drivers/gpu/drm/i915/i915_gem_gtt.c | 85 ++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 - drivers/gpu/drm/i915/i915_perf.c | 20 +-- drivers/gpu/drm/i915/i915_vma.c | 77 ++++++----- drivers/gpu/drm/i915/i915_vma.h | 61 +++------ drivers/gpu/drm/i915/intel_display.c | 32 +---- drivers/gpu/drm/i915/intel_fbdev.c | 8 +- drivers/gpu/drm/i915/intel_guc.c | 1 + drivers/gpu/drm/i915/intel_overlay.c | 12 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 + 29 files changed, 301 insertions(+), 539 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index f253ec5765ad..453f50182f55 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -154,7 +154,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->gem_context->i915; struct drm_i915_gem_object *obj = w->sleeve->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; @@ -170,11 +169,9 @@ static void clear_pages_worker(struct work_struct *work) obj->cache_dirty = false; } - /* XXX: we need to kill this */ - mutex_lock(&i915->drm.struct_mutex); err = i915_vma_pin(vma, 0, 0, PIN_USER); if (unlikely(err)) - goto out_unlock; + goto out_signal; rq = i915_request_create(w->ce); if (IS_ERR(rq)) { @@ -210,8 +207,6 @@ static void clear_pages_worker(struct work_struct *work) i915_request_add(rq); out_unpin: i915_vma_unpin(vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); out_signal: if (unlikely(err)) { dma_fence_set_error(&w->dma, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 7e6ed767348c..56aade8fb702 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -26,7 +26,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_global)) + if (!atomic_read(&obj->pin_global)) return; i915_gem_object_lock(obj); @@ -369,16 +369,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto out; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - goto out; - ret = i915_gem_object_lock_interruptible(obj); if (ret == 0) { ret = i915_gem_object_set_cache_level(obj, level); i915_gem_object_unlock(obj); } - mutex_unlock(&i915->drm.struct_mutex); out: i915_gem_object_put(obj); @@ -405,7 +400,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, /* Mark the global pin early so that we account for the * display coherency whilst setting up the cache domains. */ - obj->pin_global++; + atomic_inc(&obj->pin_global); /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -455,25 +450,27 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, return vma; err_unpin_global: - obj->pin_global--; + atomic_dec(&obj->pin_global); return vma; } static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_vma *vma; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - mutex_lock(&i915->ggtt.vm.mutex); - for_each_ggtt_vma(vma, obj) { - if (!drm_mm_node_allocated(&vma->node)) - continue; + if (mutex_trylock(&i915->ggtt.vm.mutex)) { + struct i915_vma *vma; - list_move_tail(&vma->vm_link, &vma->vm->bound_list); + for_each_ggtt_vma(vma, obj) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + } + mutex_unlock(&i915->ggtt.vm.mutex); } - mutex_unlock(&i915->ggtt.vm.mutex); if (i915_gem_object_is_shrinkable(obj)) { unsigned long flags; @@ -492,12 +489,10 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; - assert_object_held(obj); - - if (WARN_ON(obj->pin_global == 0)) + if (GEM_WARN_ON(!atomic_read(&obj->pin_global))) return; - if (--obj->pin_global == 0) + if (atomic_dec_and_test(&obj->pin_global)) vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index ccb089420830..cdc1bd285c62 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -547,8 +547,11 @@ eb_add_vma(struct i915_execbuffer *eb, eb_unreserve_vma(vma, vma->exec_flags); list_add_tail(&vma->exec_link, &eb->unbound); - if (drm_mm_node_allocated(&vma->node)) + if (drm_mm_node_allocated(&vma->node)) { + mutex_lock(&vma->vm->mutex); err = i915_vma_unbind(vma); + mutex_unlock(&vma->vm->mutex); + } if (unlikely(err)) vma->exec_flags = NULL; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 49cf9ad97bfc..9fa0569afb50 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -248,16 +248,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) goto err_rpm; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { - ret = -EFAULT; - goto err_unlock; - } - /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | @@ -287,13 +277,19 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) } if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unlock; + goto err_reset; } - ret = i915_vma_pin_fence(vma); + assert_rpm_wakelock_held(i915); + + ret = mutex_lock_interruptible(&vma->vm->mutex); if (ret) goto err_unpin; + ret = __i915_vma_pin_fence(vma); + if (ret) + goto err_unlock; + /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), @@ -304,7 +300,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) goto err_fence; /* Mark as being mmapped into userspace for later revocation */ - assert_rpm_wakelock_held(i915); if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) @@ -316,10 +311,10 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) err_fence: i915_vma_unpin_fence(vma); +err_unlock: + mutex_unlock(&vma->vm->mutex); err_unpin: __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); err_reset: i915_reset_unlock(i915, srcu); err_rpm: @@ -405,7 +400,7 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) * requirement that operations to the GGTT be made holding the RPM * wakeref. */ - lockdep_assert_held(&i915->drm.struct_mutex); + lockdep_assert_held(&i915->ggtt.vm.mutex); wakeref = intel_runtime_pm_get(i915); if (!obj->userfault_count) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index ee69cd7948c0..a62ba5de56b4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -174,12 +174,15 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, trace_i915_gem_object_destroy(obj); - mutex_lock(&i915->drm.struct_mutex); - list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { + struct i915_address_space *vm = vma->vm; + GEM_BUG_ON(i915_vma_is_active(vma)); - vma->flags &= ~I915_VMA_PIN_MASK; + atomic_set(&vma->pin_count, 0); + + mutex_lock(&vm->mutex); i915_vma_destroy(vma); + mutex_unlock(&vm->mutex); } GEM_BUG_ON(!list_empty(&obj->vma.list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); @@ -200,8 +203,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } - mutex_unlock(&i915->drm.struct_mutex); - GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(!list_empty(&obj->lut_list)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7ea8013d108f..0fc54924b33a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -403,7 +403,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_global; /* currently in use by HW, keep flushed */ + /* Currently in use by HW? Keep flushed. */ + return atomic_read(&obj->pin_global); } static inline void __start_cpu_write(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 8f61d7a93078..f792953b8a71 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -159,7 +159,7 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; /** Count of how many global VMA are currently pinned for use by HW */ - unsigned int pin_global; + atomic_t pin_global; struct { struct mutex lock; /* protects the pages and their use */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 48451110e736..4eeef6225a71 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -16,40 +16,6 @@ #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -78,7 +44,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) * To simplify the scan, and to avoid walking the list of vma under the * object, we just check the count of its permanently pinned. */ - if (READ_ONCE(obj->pin_global)) + if (atomic_read(&obj->pin_global)) return false; /* We can only return physical pages to the system if we can either @@ -154,10 +120,6 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, flags, &unlock)) - return 0; /* * When shrinking the active list, also consider active contexts. @@ -174,7 +136,6 @@ i915_gem_shrink(struct drm_i915_private *i915, MAX_SCHEDULE_TIMEOUT); trace_i915_gem_shrink(i915, target, flags); - i915_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -216,13 +177,6 @@ i915_gem_shrink(struct drm_i915_private *i915, INIT_LIST_HEAD(&still_in_list); - /* - * We serialize our access to unreferenced objects through - * the use of the struct_mutex. While the objects are not - * yet freed (due to RCU then a workqueue) we still want - * to be able to shrink their pages, so they remain on - * the unbound/bound list until actually freed. - */ spin_lock_irqsave(&i915->mm.obj_lock, flags); while (count < target && (obj = list_first_entry_or_null(phase->list, @@ -270,10 +224,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (flags & I915_SHRINK_BOUND) intel_runtime_pm_put(i915, wakeref); - i915_retire_requests(i915); - - shrinker_unlock(i915, unlock); - if (nr_scanned) *nr_scanned += scanned; return count; @@ -343,13 +293,9 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; - bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, @@ -370,8 +316,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) } } - shrinker_unlock(i915, unlock); - return sc->nr_scanned ? freed : SHRINK_STOP; } @@ -423,16 +367,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr struct i915_vma *vma, *next; unsigned long freed_pages = 0; intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; /* Force everything onto the inactive lists */ if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT)) - goto out; + return NOTIFY_DONE; with_intel_runtime_pm(i915, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, @@ -449,16 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; - mutex_unlock(&i915->ggtt.vm.mutex); if (i915_vma_unbind(vma) == 0) freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); } mutex_unlock(&i915->ggtt.vm.mutex); -out: - shrinker_unlock(i915, unlock); - *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } @@ -500,37 +435,13 @@ void i915_gem_shrinker_unregister(struct drm_i915_private *i915) void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, struct mutex *mutex) { - bool unlock = false; - if (!IS_ENABLED(CONFIG_LOCKDEP)) return; - if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) { - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_NORMAL, 0, _RET_IP_); - unlock = true; - } - fs_reclaim_acquire(GFP_KERNEL); - /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. - */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); mutex_release(&mutex->dep_map, 0, _RET_IP_); - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); - fs_reclaim_release(GFP_KERNEL); - - if (unlock) - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 65ba85655582..2990ef3604aa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -618,8 +618,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (!drm_mm_initialized(&dev_priv->mm.stolen)) return NULL; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -671,10 +669,12 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ + mutex_lock(&ggtt->vm.mutex); ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { + mutex_unlock(&ggtt->vm.mutex); DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); goto err_pages; } @@ -685,7 +685,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); - mutex_lock(&ggtt->vm.mutex); list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ca0c2f451742..f919dbb36edb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -212,15 +212,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (i915_gem_object_is_framebuffer(obj)) + i915_gem_object_lock(obj); + if (i915_gem_object_is_framebuffer(obj)) { + i915_gem_object_unlock(obj); return -EBUSY; + } - /* We need to rebind the object if its current allocation + /* + * We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but * need to ensure that any fence register is updated before @@ -233,17 +236,37 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) + err = mutex_lock_interruptible(&i915->ggtt.vm.mutex); + if (err) { + i915_gem_object_unlock(obj); return err; + } - i915_gem_object_lock(obj); - if (i915_gem_object_is_framebuffer(obj)) { + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) { + mutex_unlock(&i915->ggtt.vm.mutex); i915_gem_object_unlock(obj); - return -EBUSY; + return err; } - /* If the memory has unknown (i.e. varying) swizzling, we pin the + for_each_ggtt_vma(vma, obj) { + vma->fence_size = + i915_gem_fence_size(i915, vma->size, tiling, stride); + vma->fence_alignment = + i915_gem_fence_alignment(i915, + vma->size, tiling, stride); + + if (vma->fence) + vma->fence->dirty = true; + } + + /* Force the fence to be reacquired for GTT access */ + i915_gem_object_release_mmap(obj); + + mutex_unlock(&i915->ggtt.vm.mutex); + + /* + * If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. */ @@ -264,23 +287,9 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } mutex_unlock(&obj->mm.lock); - for_each_ggtt_vma(vma, obj) { - vma->fence_size = - i915_gem_fence_size(i915, vma->size, tiling, stride); - vma->fence_alignment = - i915_gem_fence_alignment(i915, - vma->size, tiling, stride); - - if (vma->fence) - vma->fence->dirty = true; - } - obj->tiling_and_stride = tiling | stride; i915_gem_object_unlock(obj); - /* Force the fence to be reacquired for GTT access */ - i915_gem_object_release_mmap(obj); - /* Try to preallocate memory required to save swizzling on put-pages */ if (i915_gem_object_needs_bit17_swizzle(obj)) { if (!obj->bit_17) { @@ -364,12 +373,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 528b61678334..f093deaeb5c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -93,7 +93,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); struct interval_tree_node *it; - struct mutex *unlock = NULL; unsigned long end; int ret = 0; @@ -130,32 +129,12 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - ret = i915_gem_object_unbind(obj); if (ret == 0) ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); i915_gem_object_put(obj); if (ret) - goto unlock; + return ret; spin_lock(&mn->lock); @@ -168,12 +147,7 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - return ret; - } static const struct mmu_notifier_ops i915_gem_userptr_notifier = { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b0ba1680ede6..fbbb89051c21 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -168,7 +168,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (err) return err; + mutex_lock(&to_i915(obj->base.dev)->ggtt.vm.mutex); i915_vma_destroy(vma); + mutex_unlock(&to_i915(obj->base.dev)->ggtt.vm.mutex); } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a32698f7645f..e9b86787e8ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -110,7 +110,7 @@ static int __context_pin_state(struct i915_vma *vma) * And mark it as a globally pinned object to let the shrinker know * it cannot reclaim the object until we release it. */ - vma->obj->pin_global++; + atomic_inc(&vma->obj->pin_global); vma->obj->mm.dirty = true; return 0; @@ -118,7 +118,8 @@ static int __context_pin_state(struct i915_vma *vma) static void __context_unpin_state(struct i915_vma *vma) { - vma->obj->pin_global--; + GEM_BUG_ON(!atomic_read(&vma->obj->pin_global)); + atomic_dec(&vma->obj->pin_global); __i915_vma_unpin(vma); } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index f4750207c393..2b4485d918f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1183,7 +1183,7 @@ int intel_ring_pin(struct intel_ring *ring) goto unpin_ring; } - vma->obj->pin_global++; + atomic_read(&vma->obj->pin_global); ring->vaddr = addr; return 0; @@ -1219,7 +1219,7 @@ void intel_ring_unpin(struct intel_ring *ring) i915_gem_object_unpin_map(ring->vma->obj); ring->vaddr = NULL; - ring->vma->obj->pin_global--; + atomic_dec(&ring->vma->obj->pin_global); i915_vma_unpin(ring->vma); i915_timeline_unpin(ring->timeline); diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 4098902bfaeb..a2d7a1df820f 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) flags = PIN_MAPPABLE; } - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); mmio_hw_access_pre(dev_priv); ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); mmio_hw_access_post(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); if (ret) gvt_err("fail to alloc %s gm space from host\n", high_gm ? "high" : "low"); @@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu) return 0; out_free_aperture: - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); return ret; } @@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); drm_mm_remove_node(&vgpu->gm.high_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); } /** @@ -172,14 +172,14 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu) intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); _clear_vgpu_fence(vgpu); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = vgpu->fence.regs[i]; i915_unreserve_fence(reg); vgpu->fence.regs[i] = NULL; } - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); intel_runtime_pm_put_unchecked(dev_priv); } @@ -194,7 +194,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) intel_runtime_pm_get(dev_priv); /* Request fences from host */ - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = i915_reserve_fence(dev_priv); @@ -206,7 +206,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) _clear_vgpu_fence(vgpu); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); intel_runtime_pm_put_unchecked(dev_priv); return 0; out_free_fence: @@ -219,7 +219,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) i915_unreserve_fence(reg); vgpu->fence.regs[i] = NULL; } - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); intel_runtime_pm_put_unchecked(dev_priv); return -ENOSPC; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e2d6b8dff4ab..3e78b5d0d5a6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -76,7 +76,7 @@ static int i915_capabilities(struct seq_file *m, void *data) static char get_pin_flag(struct drm_i915_gem_object *obj) { - return obj->pin_global ? 'p' : ' '; + return atomic_read(&obj->pin_global) ? 'p' : ' '; } static char get_tiling_flag(struct drm_i915_gem_object *obj) @@ -218,7 +218,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (pinned x %d)", pin_count); if (obj->stolen) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); - if (obj->pin_global) + if (atomic_read(&obj->pin_global)) seq_printf(m, " (global)"); engine = i915_gem_object_last_write_engine(obj); @@ -1602,11 +1602,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) struct drm_device *dev = &dev_priv->drm; struct intel_framebuffer *fbdev_fb = NULL; struct drm_framebuffer *drm_fb; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; #ifdef CONFIG_DRM_FBDEV_EMULATION if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) { @@ -1641,7 +1636,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) seq_putc(m, '\n'); } mutex_unlock(&dev->mode_config.fb_lock); - mutex_unlock(&dev->struct_mutex); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d74fcddd863e..fde995b05c81 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -60,20 +60,31 @@ #include "intel_pm.h" static int -insert_mappable_node(struct i915_ggtt *ggtt, - struct drm_mm_node *node, u32 size) +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { + int ret; + memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, - size, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); + + ret = mutex_lock_interruptible(&ggtt->vm.mutex); + if (ret) + return ret; + + ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, + size, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); + + return ret; } static void -remove_mappable_node(struct drm_mm_node *node) +remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) { + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(node); + mutex_unlock(&ggtt->vm.mutex); } int @@ -84,8 +95,11 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; u64 pinned; + int ret; - mutex_lock(&ggtt->vm.mutex); + ret = mutex_lock_interruptible(&ggtt->vm.mutex); + if (ret) + return ret; pinned = ggtt->vm.reserved; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) @@ -106,8 +120,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) LIST_HEAD(still_in_list); int ret = 0; - lockdep_assert_held(&obj->base.dev->struct_mutex); - spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, @@ -115,7 +127,11 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) list_move_tail(&vma->obj_link, &still_in_list); spin_unlock(&obj->vma.lock); - ret = i915_vma_unbind(vma); + ret = mutex_lock_interruptible(&vma->vm->mutex); + if (!ret) { + ret = i915_vma_unbind(vma); + mutex_unlock(&vma->vm->mutex); + } spin_lock(&obj->vma.lock); } @@ -371,10 +387,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, u64 remain, offset; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - wakeref = intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | @@ -383,7 +395,12 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; - ret = i915_vma_put_fence(vma); + + ret = mutex_lock_interruptible(&vma->vm->mutex); + if (!ret) { + ret = i915_vma_put_fence(vma); + mutex_unlock(&vma->vm->mutex); + } if (ret) { i915_vma_unpin(vma); vma = ERR_PTR(ret); @@ -392,12 +409,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; + goto out_rpm; GEM_BUG_ON(!node.allocated); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -453,17 +468,15 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); if (node.allocated) { wmb(); ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -570,10 +583,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, void __user *user_data; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - if (i915_gem_object_has_struct_page(obj)) { /* * Avoid waking the device up if we can fallback, as @@ -583,10 +592,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * using the cache bypass of indirect GGTT access. */ wakeref = intel_runtime_pm_get_if_in_use(i915); - if (!wakeref) { - ret = -EFAULT; - goto out_unlock; - } + if (!wakeref) + return -EFAULT; } else { /* No backing pages, no fallback, we must force GGTT access */ wakeref = intel_runtime_pm_get(i915); @@ -599,7 +606,12 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; - ret = i915_vma_put_fence(vma); + + ret = mutex_lock_interruptible(&vma->vm->mutex); + if (!ret) { + ret = i915_vma_put_fence(vma); + mutex_unlock(&vma->vm->mutex); + } if (ret) { i915_vma_unpin(vma); vma = ERR_PTR(ret); @@ -612,8 +624,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, GEM_BUG_ON(!node.allocated); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -676,18 +686,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); if (node.allocated) { wmb(); ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } out_rpm: intel_runtime_pm_put(i915, wakeref); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -1032,8 +1039,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); - if (flags & PIN_MAPPABLE && (!view || view->type == I915_GGTT_VIEW_NORMAL)) { /* If the required space is larger than the available @@ -1070,14 +1075,28 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return vma; + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ERR_PTR(ret); + + ret = mutex_lock_interruptible(&vm->mutex); + if (ret) { + vma = ERR_PTR(ret); + goto unpin; + } + if (i915_vma_misplaced(vma, size, alignment, flags)) { if (flags & PIN_NONBLOCK) { - if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) - return ERR_PTR(-ENOSPC); + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) { + vma = ERR_PTR(-ENOSPC); + goto unlock; + } if (flags & PIN_MAPPABLE && - vma->fence_size > dev_priv->ggtt.mappable_end / 2) - return ERR_PTR(-ENOSPC); + vma->fence_size > dev_priv->ggtt.mappable_end / 2) { + vma = ERR_PTR(-ENOSPC); + goto unlock; + } } WARN(i915_vma_is_pinned(vma), @@ -1088,14 +1107,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, !!(flags & PIN_MAPPABLE), i915_vma_is_map_and_fenceable(vma)); ret = i915_vma_unbind(vma); - if (ret) - return ERR_PTR(ret); + if (ret) { + vma = ERR_PTR(ret); + goto unlock; + } } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = __i915_vma_do_pin(vma, size, alignment, flags | PIN_GLOBAL); if (ret) - return ERR_PTR(ret); + vma = ERR_PTR(ret); +unlock: + mutex_unlock(&vm->mutex); +unpin: + i915_gem_object_unpin_pages(obj); return vma; } @@ -1394,7 +1419,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) * from the GTT to prevent such accidents and reclaim the * space. */ + mutex_lock(&state->vm->mutex); err = i915_vma_unbind(state); + mutex_unlock(&state->vm->mutex); if (err) goto err_active; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a5783c4cb98b..0202f58d8bb5 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -48,8 +48,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * bound by their active reference. */ return i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } @@ -108,7 +107,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct i915_vma *active; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* @@ -131,15 +130,6 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, cache_level, start, end, mode); - /* - * Retire before we search the active list. Although we have - * reasonable accuracy in our retirement lists, we may have - * a stray pin (preventing eviction) that can only be resolved by - * retiring. - */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(dev_priv); - search_again: active = NULL; INIT_LIST_HEAD(&eviction_list); @@ -273,20 +263,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, bool check_color; int ret = 0; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); trace_i915_gem_evict_node(vm, target, flags); - /* Retire before we search the active list. Although we have - * reasonable accuracy in our retirement lists, we may have - * a stray pin (preventing eviction) that can only be resolved by - * retiring. - */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(vm->i915); - check_color = vm->mm.color_adjust; if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ @@ -384,7 +366,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) struct i915_vma *vma, *next; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict_vm(vm); /* Switch back to the default context in order to unpin @@ -399,7 +381,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm) } INIT_LIST_HEAD(&eviction_list); - mutex_lock(&vm->mutex); list_for_each_entry(vma, &vm->bound_list, vm_link) { if (i915_vma_is_pinned(vma)) continue; @@ -407,7 +388,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm) __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); } - mutex_unlock(&vm->mutex); ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 543c5a47cc79..57f1ca4cc9fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -339,26 +339,8 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) return ERR_PTR(-EDEADLK); } -/** - * i915_vma_pin_fence - set up fencing for a vma - * @vma: vma to map through a fence reg - * - * When mapping objects through the GTT, userspace wants to be able to write - * to them without having to worry about swizzling if the object is tiled. - * This function walks the fence regs looking for a free one for @obj, - * stealing one if it can't find any. - * - * It then sets up the reg based on the object's properties: address, pitch - * and tiling format. - * - * For an untiled surface, this removes any existing fence. - * - * Returns: - * - * 0 on success, negative error code on failure. - */ int -i915_vma_pin_fence(struct i915_vma *vma) +__i915_vma_pin_fence(struct i915_vma *vma) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); struct i915_fence_reg *fence; @@ -370,12 +352,9 @@ i915_vma_pin_fence(struct i915_vma *vma) * must keep the device awake whilst using the fence. */ assert_rpm_wakelock_held(ggtt->vm.i915); + lockdep_assert_held(&ggtt->vm.mutex); GEM_BUG_ON(!i915_vma_is_pinned(vma)); - err = mutex_lock_interruptible(&ggtt->vm.mutex); - if (err) - return err; - /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; @@ -383,19 +362,17 @@ i915_vma_pin_fence(struct i915_vma *vma) atomic_inc(&fence->pin_count); if (!fence->dirty) { list_move_tail(&fence->link, &ggtt->fence_list); - goto unlock; + return 0; } } else if (set) { fence = fence_find(vma->vm->i915); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto unlock; - } + if (IS_ERR(fence)) + return PTR_ERR(fence); GEM_BUG_ON(atomic_read(&fence->pin_count)); atomic_inc(&fence->pin_count); } else { - goto unlock; + return 0; } err = fence_update(fence, set); @@ -406,12 +383,44 @@ i915_vma_pin_fence(struct i915_vma *vma) GEM_BUG_ON(vma->fence != (set ? fence : NULL)); if (set) - goto unlock; + return 0; out_unpin: atomic_dec(&fence->pin_count); -unlock: + return err; +} + +/** + * i915_vma_pin_fence - set up fencing for a vma + * @vma: vma to map through a fence reg + * + * When mapping objects through the GTT, userspace wants to be able to write + * to them without having to worry about swizzling if the object is tiled. + * This function walks the fence regs looking for a free one for @obj, + * stealing one if it can't find any. + * + * It then sets up the reg based on the object's properties: address, pitch + * and tiling format. + * + * For an untiled surface, this removes any existing fence. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ +int +i915_vma_pin_fence(struct i915_vma *vma) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); + int err; + + err = mutex_lock_interruptible(&ggtt->vm.mutex); + if (err) + return err; + + err = __i915_vma_pin_fence(vma); mutex_unlock(&ggtt->vm.mutex); + return err; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index bfca8a4a88e2..f4dbe4199120 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1929,62 +1929,15 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) free_pt(&ppgtt->base.vm, pt); } -struct gen6_ppgtt_cleanup_work { - struct work_struct base; - struct i915_vma *vma; -}; - -static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) -{ - struct gen6_ppgtt_cleanup_work *work = - container_of(wrk, typeof(*work), base); - /* Side note, vma->vm is the GGTT not the ppgtt we just destroyed! */ - struct drm_i915_private *i915 = work->vma->vm->i915; - - mutex_lock(&i915->drm.struct_mutex); - i915_vma_destroy(work->vma); - mutex_unlock(&i915->drm.struct_mutex); - - kfree(work); -} - -static int nop_set_pages(struct i915_vma *vma) -{ - return -ENODEV; -} - -static void nop_clear_pages(struct i915_vma *vma) -{ -} - -static int nop_bind(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) -{ - return -ENODEV; -} - -static void nop_unbind(struct i915_vma *vma) -{ -} - -static const struct i915_vma_ops nop_vma_ops = { - .set_pages = nop_set_pages, - .clear_pages = nop_clear_pages, - .bind_vma = nop_bind, - .unbind_vma = nop_unbind, -}; - static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - struct gen6_ppgtt_cleanup_work *work = ppgtt->work; + struct i915_address_space *ggtt = ppgtt->vma->vm; - /* FIXME remove the struct_mutex to bring the locking under control */ - INIT_WORK(&work->base, gen6_ppgtt_cleanup_work); - work->vma = ppgtt->vma; - work->vma->ops = &nop_vma_ops; - schedule_work(&work->base); + mutex_lock(&ggtt->mutex); + GEM_BUG_ON(ppgtt->vma->vm != ggtt); + i915_vma_destroy(ppgtt->vma); + mutex_unlock(&ggtt->mutex); gen6_ppgtt_free_pd(ppgtt); gen6_ppgtt_free_scratch(vm); @@ -2159,15 +2112,9 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL); - if (!ppgtt->work) { - err = -ENOMEM; - goto err_free; - } - err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_work; + goto err_free; ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); if (IS_ERR(ppgtt->vma)) { @@ -2179,8 +2126,6 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) err_scratch: gen6_ppgtt_free_scratch(&ppgtt->base.vm); -err_work: - kfree(ppgtt->work); err_free: kfree(ppgtt); return ERR_PTR(err); @@ -2272,7 +2217,9 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); + mutex_lock(&vm->mutex); ppgtt_destroy_vma(vm); + mutex_unlock(&vm->mutex); GEM_BUG_ON(!list_empty(&vm->bound_list)); @@ -2944,11 +2891,12 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) ggtt->vm.closed = true; - mutex_lock(&dev_priv->drm.struct_mutex); fini_aliasing_ppgtt(dev_priv); + mutex_lock(&ggtt->vm.mutex); list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) WARN_ON(i915_vma_unbind(vma)); + mutex_unlock(&ggtt->vm.mutex); if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); @@ -2968,8 +2916,6 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) __pagevec_release(pvec); } - mutex_unlock(&dev_priv->drm.struct_mutex); - arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->iomap); @@ -3572,7 +3518,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) * beyond the end of the batch buffer, across the page boundary, * and beyond the end of the GTT if we do not provide a guard. */ - mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); ggtt->vm.is_ggtt = true; @@ -3582,7 +3527,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) if (!HAS_LLC(dev_priv) && !HAS_PPGTT(dev_priv)) ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; - mutex_unlock(&dev_priv->drm.struct_mutex); if (!io_mapping_init_wc(&dev_priv->ggtt.iomap, dev_priv->ggtt.gmadr.start, @@ -3661,10 +3605,8 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) if (!(vma->flags & I915_VMA_GLOBAL_BIND)) continue; - mutex_unlock(&ggtt->vm.mutex); - if (!i915_vma_unbind(vma)) - goto lock; + continue; WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, @@ -3674,9 +3616,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); i915_gem_object_unlock(obj); } - -lock: - mutex_lock(&ggtt->vm.mutex); } ggtt->vm.closed = false; @@ -4073,7 +4012,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 offset; int err; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(alignment && !is_power_of_2(alignment)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0e9926b32408..806ba2816a69 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -439,8 +439,6 @@ struct gen6_ppgtt { unsigned int pin_count; bool scan_for_unused_pt; - - struct gen6_ppgtt_cleanup_work *work; }; #define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base) @@ -694,7 +692,6 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_OFFSET_BIAS BIT_ULL(6) #define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ #define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ #define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ #define PIN_UPDATE BIT_ULL(11) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2e33a9b4eae7..517c3a16f1cd 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1347,13 +1347,9 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) static void free_oa_buffer(struct drm_i915_private *i915) { - mutex_lock(&i915->drm.struct_mutex); - i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma, I915_VMA_RELEASE_MAP); - mutex_unlock(&i915->drm.struct_mutex); - i915->perf.oa.oa_buffer.vaddr = NULL; } @@ -1505,19 +1501,12 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) return -ENODEV; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; - BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); - if (IS_ERR(bo)) { - DRM_ERROR("Failed to allocate OA buffer\n"); - ret = PTR_ERR(bo); - goto unlock; - } + if (IS_ERR(bo)) + return PTR_ERR(bo); i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); @@ -1539,8 +1528,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), dev_priv->perf.oa.oa_buffer.vaddr); - - goto unlock; + return 0; err_unpin: __i915_vma_unpin(vma); @@ -1551,8 +1539,6 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) dev_priv->perf.oa.oa_buffer.vaddr = NULL; dev_priv->perf.oa.oa_buffer.vma = NULL; -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 89079630d0af..68b45d3bba9b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -314,8 +314,6 @@ vma_lookup(struct drm_i915_gem_object *obj, * Once created, the VMA is kept until either the object is freed, or the * address space is closed. * - * Must be called with struct_mutex held. - * * Returns the vma, or an error pointer. */ struct i915_vma * @@ -448,7 +446,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(vma->vm->i915); - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + mutex_lock(&vma->vm->mutex); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; @@ -472,16 +471,19 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) __i915_vma_pin(vma); - err = i915_vma_pin_fence(vma); + err = __i915_vma_pin_fence(vma); if (err) goto err_unpin; i915_vma_set_ggtt_write(vma); + mutex_unlock(&vma->vm->mutex); + return ptr; err_unpin: __i915_vma_unpin(vma); err: + mutex_unlock(&vma->vm->mutex); return IO_ERR_PTR(err); } @@ -497,8 +499,6 @@ void i915_vma_flush_writes(struct i915_vma *vma) void i915_vma_unpin_iomap(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); i915_vma_flush_writes(vma); @@ -680,10 +680,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } if (vma->obj) { - ret = i915_gem_object_pin_pages_async(vma->obj); - if (ret) - return ret; - + __i915_gem_object_pin_pages(vma->obj); cache_level = vma->obj->cache_level; } else { cache_level = 0; @@ -748,9 +745,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); - mutex_lock(&vma->vm->mutex); list_add_tail(&vma->vm_link, &vma->vm->bound_list); - mutex_unlock(&vma->vm->mutex); if (vma->obj) { atomic_inc(&vma->obj->bind_count); @@ -773,10 +768,8 @@ i915_vma_remove(struct i915_vma *vma) vma->ops->clear_pages(vma); - mutex_lock(&vma->vm->mutex); drm_mm_remove_node(&vma->node); list_del(&vma->vm_link); - mutex_unlock(&vma->vm->mutex); /* * Since the unbound list is global, only move to that list if @@ -797,25 +790,20 @@ i915_vma_remove(struct i915_vma *vma) } } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) +int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { const unsigned int bound = vma->flags; int ret; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + lockdep_assert_held(&vma->vm->mutex); - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err_unpin; - } + if (atomic_read(&vma->pin_count)) + goto out; if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - goto err_unpin; + return ret; } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); @@ -829,6 +817,9 @@ int __i915_vma_do_pin(struct i915_vma *vma, __i915_vma_set_map_and_fenceable(vma); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); + +out: + atomic_inc(&vma->pin_count); return 0; err_remove: @@ -837,11 +828,34 @@ int __i915_vma_do_pin(struct i915_vma *vma, GEM_BUG_ON(vma->pages); GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK); } -err_unpin: - __i915_vma_unpin(vma); return ret; } +int i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + int err; + + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + + if (vma->obj) { + err = i915_gem_object_pin_pages_async(vma->obj); + if (err) + return err; + } + + err = mutex_lock_interruptible(&vma->vm->mutex); + if (!err) { + err = __i915_vma_do_pin(vma, size, alignment, flags); + mutex_unlock(&vma->vm->mutex); + } + + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); + + return err; +} + void i915_vma_close(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; @@ -904,7 +918,7 @@ static void __i915_vma_destroy(struct i915_vma *vma) void i915_vma_destroy(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); GEM_BUG_ON(i915_vma_is_pinned(vma)); @@ -922,10 +936,14 @@ void i915_vma_parked(struct drm_i915_private *i915) spin_lock_irq(&i915->gt.closed_lock); list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { + struct i915_address_space *vm = vma->vm; + list_del_init(&vma->closed_link); spin_unlock_irq(&i915->gt.closed_lock); + mutex_lock(&vm->mutex); i915_vma_destroy(vma); + mutex_unlock(&vm->mutex); spin_lock_irq(&i915->gt.closed_lock); } @@ -948,7 +966,8 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) struct drm_vma_offset_node *node = &vma->obj->base.vma_node; u64 vma_offset; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); if (!i915_vma_has_userfault(vma)) return; @@ -1031,7 +1050,7 @@ int i915_vma_unbind(struct i915_vma *vma) { int ret; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); if (vma->async.dma && dma_fence_wait_timeout(vma->async.dma, true, diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 67e43f5d01f6..675472c44a4b 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -72,38 +72,14 @@ struct i915_vma { * that exist in the ctx->handle_vmas LUT for this vma. */ atomic_t open_count; + atomic_t pin_count; unsigned long flags; - /** - * How many users have pinned this object in GTT space. - * - * This is a tightly bound, fairly small number of users, so we - * stuff inside the flags field so that we can both check for overflow - * and detect a no-op i915_vma_pin() in a single check, while also - * pinning the vma. - * - * The worst case display setup would have the same vma pinned for - * use on each plane on each crtc, while also building the next atomic - * state and holding a pin for the length of the cleanup queue. In the - * future, the flip queue may be increased from 1. - * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 - * - * For GEM, the number of concurrent users for pwrite/pread is - * unbounded. For execbuffer, it is currently one but will in future - * be extended to allow multiple clients to pin vma concurrently. - * - * We also use suballocated pages, with each suballocation claiming - * its own pin on the shared vma. At present, this is limited to - * exclusive cachelines of a single page, so a maximum of 64 possible - * users. - */ -#define I915_VMA_PIN_MASK 0xff -#define I915_VMA_PIN_OVERFLOW BIT(8) /** Flags and address space this VMA is bound to */ #define I915_VMA_GLOBAL_BIND BIT(9) #define I915_VMA_LOCAL_BIND BIT(10) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) -#define I915_VMA_ALLOC_BIND I915_VMA_PIN_OVERFLOW /* not stored */ +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND) +#define I915_VMA_ALLOC_BIND BIT(0) /* not stored */ #define I915_VMA_GGTT BIT(11) #define I915_VMA_CAN_FENCE BIT(12) @@ -324,30 +300,27 @@ static inline void i915_vma_unlock(struct i915_vma *vma) reservation_object_unlock(vma->resv); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); +int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +int i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); + static inline int __must_check i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) { + if (atomic_add_unless(&vma->pin_count, 1, 0)) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; } - return __i915_vma_do_pin(vma, size, alignment, flags); + return i915_vma_do_pin(vma, size, alignment, flags); } static inline int i915_vma_pin_count(const struct i915_vma *vma) { - return vma->flags & I915_VMA_PIN_MASK; + return atomic_read(&vma->pin_count); } static inline bool i915_vma_is_pinned(const struct i915_vma *vma) @@ -357,19 +330,18 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { - vma->flags++; - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); + atomic_inc(&vma->pin_count); } static inline void __i915_vma_unpin(struct i915_vma *vma) { - vma->flags--; + atomic_dec(&vma->pin_count); } static inline void i915_vma_unpin(struct i915_vma *vma) { - GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(!atomic_read(&vma->pin_count)); __i915_vma_unpin(vma); } @@ -388,8 +360,6 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma, * the caller must call i915_vma_unpin_iomap to relinquish the pinning * after the iomapping is no longer required. * - * Callers must hold the struct_mutex. - * * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); @@ -401,8 +371,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + * This function is only valid to be called on a VMA previously iomapped + * by the caller with i915_vma_pin_iomap(). */ void i915_vma_unpin_iomap(struct i915_vma *vma); @@ -412,6 +382,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) return sg_page(vma->pages->sgl); } +int __i915_vma_pin_fence(struct i915_vma *vma); + /** * i915_vma_pin_fence - pin fencing state * @vma: vma to pin fencing for @@ -447,7 +419,6 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma) static inline void i915_vma_unpin_fence(struct i915_vma *vma) { - /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */ if (vma->fence) __i915_vma_unpin_fence(vma); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3a3298812ad8..aa94f4499dfe 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2093,8 +2093,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int pinctl; u32 alignment; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - alignment = intel_surf_alignment(fb, 0); /* Note that the w/a also requires 64 PTE of padding following the @@ -2175,13 +2173,9 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - - i915_gem_object_lock(vma->obj); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); - i915_gem_object_unlock(vma->obj); i915_vma_put(vma); } @@ -3077,12 +3071,10 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - mutex_unlock(&dev->struct_mutex); if (!obj) return false; @@ -3244,13 +3236,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, intel_state->color_plane[0].stride = intel_fb_pitch(fb, 0, intel_state->base.rotation); - mutex_lock(&dev->struct_mutex); intel_state->vma = intel_pin_and_fence_fb_obj(fb, &intel_state->view, intel_plane_uses_fence(intel_state), &intel_state->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(intel_state->vma)) { DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", intel_crtc->pipe, PTR_ERR(intel_state->vma)); @@ -14152,8 +14142,6 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) * bits. Some older platforms need special physical address handling for * cursor planes. * - * Must be called with struct_mutex held. - * * Returns 0 on success, negative error code on failure. */ int @@ -14210,15 +14198,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) return ret; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) { - i915_gem_object_unpin_pages(obj); - return ret; - } - ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; @@ -14267,8 +14248,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, * @old_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. - * - * Must be called with struct_mutex held. */ void intel_cleanup_plane_fb(struct drm_plane *plane, @@ -14284,9 +14263,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } /* Should only be called after a successful intel_prepare_plane_fb()! */ - mutex_lock(&dev_priv->drm.struct_mutex); intel_plane_unpin_fb(to_intel_plane_state(old_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); } int @@ -14490,7 +14467,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct drm_plane_state *old_plane_state, *new_plane_state; struct intel_plane *intel_plane = to_intel_plane(plane); struct intel_crtc_state *crtc_state = @@ -14556,13 +14532,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) - goto out_free; - ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); if (ret) - goto out_unlock; + goto out_free; intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP); intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb), @@ -14592,8 +14564,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); out_free: if (new_crtc_state) intel_crtc_destroy_state(crtc, &new_crtc_state->base); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index c7c11d1842af..1464157d0ffe 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -212,7 +212,6 @@ static int intelfb_create(struct drm_fb_helper *helper, sizes->fb_height = intel_fb->base.height; } - mutex_lock(&dev->struct_mutex); wakeref = intel_runtime_pm_get(dev_priv); /* Pin the GGTT vma for our access via info->screen_base. @@ -273,7 +272,6 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->vma_flags = flags; intel_runtime_pm_put(dev_priv, wakeref); - mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -281,7 +279,6 @@ static int intelfb_create(struct drm_fb_helper *helper, intel_unpin_fb_vma(vma, flags); out_unlock: intel_runtime_pm_put(dev_priv, wakeref); - mutex_unlock(&dev->struct_mutex); return ret; } @@ -298,11 +295,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) drm_fb_helper_fini(&ifbdev->helper); - if (ifbdev->vma) { - mutex_lock(&ifbdev->helper.dev->struct_mutex); + if (ifbdev->vma) intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags); - mutex_unlock(&ifbdev->helper.dev->struct_mutex); - } if (ifbdev->fb) drm_framebuffer_remove(&ifbdev->fb->base); diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 43232242d167..4a0e7691090f 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -673,6 +673,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) goto err; flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + ret = i915_vma_pin(vma, 0, 0, flags); if (ret) { vma = ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index dc7b66c94f74..cec765ca3614 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -752,7 +752,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct i915_vma *vma; int ret, tmp_width; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_release_old_vid(overlay); @@ -1308,15 +1307,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) struct i915_vma *vma; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); if (obj == NULL) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) { @@ -1337,13 +1332,10 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) } overlay->reg_bo = obj; - mutex_unlock(&i915->drm.struct_mutex); return 0; err_put_bo: i915_gem_object_put(obj); -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 615ac485c731..eeb0450c3f84 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -337,7 +337,9 @@ static int igt_vma_pin1(void *arg) if (!err) { i915_vma_unpin(vma); + mutex_lock(&ggtt->vm.mutex); err = i915_vma_unbind(vma); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("Failed to unbind single page from GGTT, err=%d\n", err); goto out; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx