Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex") moves the vma binding to dma_fence_work, but dma_fence_work has has signalling fence semantics. On braswell, we can call stop_machine inside fence_work, causing a splat because memory allocation inside stop_machine is allowed. This patch does not fix that lockdep splat yet, but will allow the next patch to remove it. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx> Acked-by: Daniel Vetter <daniel.vetter@xxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 - drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 - drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 - drivers/gpu/drm/i915/gt/intel_gtt.h | 2 - drivers/gpu/drm/i915/i915_gem.c | 6 - drivers/gpu/drm/i915/i915_vma.c | 174 +++------------------ drivers/gpu/drm/i915/i915_vma.h | 5 +- drivers/gpu/drm/i915/i915_vma_types.h | 3 - 9 files changed, 21 insertions(+), 178 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b0597de206de..ec04df0a3b89 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt, if (!drm_mm_node_allocated(&ggtt->error_capture)) return; - if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND) - return; /* beware stop_machine() inversion */ - GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); mutex_lock(&ggtt->error_mutex); diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index e08dff376339..0c5a9a767849 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); ppgtt->base.vm.top = 1; - ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 176c19633412..92f8a23e66cc 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) goto err_free_pd; } - ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 670c1271e7d5..e1ec6edae1fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt) list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - i915_vma_wait_for_bind(vma); if (i915_vma_is_pinned(vma)) continue; @@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); ggtt->alias = ppgtt; - ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; @@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; - ggtt->vm.bind_async_flags = - I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; } ggtt->invalidate = gen8_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index e67e34e17913..d9d2ca8b4b61 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -230,8 +230,6 @@ struct i915_address_space { u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ - unsigned int bind_async_flags; - /* * Each active user context has its own address space (in full-ppgtt). * Since the vm may be shared between multiple contexts, we count how diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b23f58e94cfb..4639c47c038b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, mutex_unlock(&ggtt->vm.mutex); } - ret = i915_vma_wait_for_bind(vma); - if (ret) { - i915_vma_unpin(vma); - return ERR_PTR(ret); - } - return vma; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 07490db51cdc..e4b2590d41ce 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj, return vma; } -struct i915_vma_work { - struct dma_fence_work base; - struct i915_address_space *vm; - struct i915_vm_pt_stash stash; - struct i915_vma *vma; - struct drm_i915_gem_object *pinned; - struct i915_sw_dma_fence_cb cb; - enum i915_cache_level cache_level; - unsigned int flags; -}; - -static int __vma_bind(struct dma_fence_work *work) -{ - struct i915_vma_work *vw = container_of(work, typeof(*vw), base); - struct i915_vma *vma = vw->vma; - - vma->ops->bind_vma(vw->vm, &vw->stash, - vma, vw->cache_level, vw->flags); - return 0; -} - -static void __vma_release(struct dma_fence_work *work) -{ - struct i915_vma_work *vw = container_of(work, typeof(*vw), base); - - if (vw->pinned) { - __i915_gem_object_unpin_pages(vw->pinned); - i915_gem_object_put(vw->pinned); - } - - i915_vm_free_pt_stash(vw->vm, &vw->stash); - i915_vm_put(vw->vm); -} - -static const struct dma_fence_work_ops bind_ops = { - .name = "bind", - .work = __vma_bind, - .release = __vma_release, -}; - -struct i915_vma_work *i915_vma_work(void) -{ - struct i915_vma_work *vw; - - vw = kzalloc(sizeof(*vw), GFP_KERNEL); - if (!vw) - return NULL; - - dma_fence_work_init(&vw->base, &bind_ops); - vw->base.dma.error = -EAGAIN; /* disable the worker by default */ - - return vw; -} - -int i915_vma_wait_for_bind(struct i915_vma *vma) -{ - int err = 0; - - if (rcu_access_pointer(vma->active.excl.fence)) { - struct dma_fence *fence; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&vma->active.excl.fence); - rcu_read_unlock(); - if (fence) { - err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT); - dma_fence_put(fence); - } - } - - return err; -} - /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map @@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) */ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags, - struct i915_vma_work *work) + u32 flags, struct i915_vm_pt_stash *stash) { u32 bind_flags; u32 vma_flags; @@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma, GEM_BUG_ON(!vma->pages); trace_i915_vma_bind(vma, bind_flags); - if (work && bind_flags & vma->vm->bind_async_flags) { - struct dma_fence *prev; - - work->vma = vma; - work->cache_level = cache_level; - work->flags = bind_flags; - - /* - * Note we only want to chain up to the migration fence on - * the pages (not the object itself). As we don't track that, - * yet, we have to use the exclusive fence instead. - * - * Also note that we do not want to track the async vma as - * part of the obj->resv->excl_fence as it only affects - * execution and not content or object's backing store lifetime. - */ - prev = i915_active_set_exclusive(&vma->active, &work->base.dma); - if (prev) { - __i915_sw_fence_await_dma_fence(&work->base.chain, - prev, - &work->cb); - dma_fence_put(prev); - } - - work->base.dma.error = 0; /* enable the queue_work() */ - - if (vma->obj) { - __i915_gem_object_pin_pages(vma->obj); - work->pinned = i915_gem_object_get(vma->obj); - } - } else { - vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); - } + vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags); atomic_or(bind_flags, &vma->flags); return 0; @@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma, if (!drm_mm_node_allocated(&vma->node)) return false; - if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) - return true; - if (vma->node.size < size) return true; @@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) if (unlikely(flags & ~bound)) return false; - if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) + if (unlikely(bound & I915_VMA_OVERFLOW)) return false; if (!(bound & I915_VMA_PIN_MASK)) @@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) */ mutex_lock(&vma->vm->mutex); do { - if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { + if (unlikely(bound & I915_VMA_OVERFLOW)) { pinned = false; break; } @@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma) int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags) { - struct i915_vma_work *work = NULL; intel_wakeref_t wakeref = 0; unsigned int bound; int err; + struct i915_vm_pt_stash stash = {}; #ifdef CONFIG_PROVE_LOCKING if (debug_locks && !WARN_ON(!ww) && vma->resv) @@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); - if (flags & vma->vm->bind_async_flags) { - /* lock VM */ - err = i915_vm_lock_objects(vma->vm, ww); + if (vma->vm->allocate_va_range) { + err = i915_vm_alloc_pt_stash(vma->vm, + &stash, + vma->size); if (err) goto err_rpm; - work = i915_vma_work(); - if (!work) { - err = -ENOMEM; + err = i915_vm_lock_objects(vma->vm, ww); + if (err) goto err_rpm; - } - work->vm = i915_vm_get(vma->vm); - - /* Allocate enough page directories to used PTE */ - if (vma->vm->allocate_va_range) { - err = i915_vm_alloc_pt_stash(vma->vm, - &work->stash, - vma->size); - if (err) - goto err_fence; - - err = i915_vm_pin_pt_stash(vma->vm, - &work->stash); - if (err) - goto err_fence; - } + err = i915_vm_pin_pt_stash(vma->vm, + &stash); + if (err) + goto err_rpm; } /* @@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, err = mutex_lock_interruptible_nested(&vma->vm->mutex, !(flags & PIN_GLOBAL)); if (err) - goto err_fence; + goto err_rpm; /* No more allocations allowed now we hold vm->mutex */ @@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, } bound = atomic_read(&vma->flags); - if (unlikely(bound & I915_VMA_ERROR)) { - err = -ENOMEM; - goto err_unlock; - } - if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { err = -EAGAIN; /* pins are meant to be fairly temporary */ goto err_unlock; @@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!vma->pages); err = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, - flags, work); + flags, &stash); if (err) goto err_remove; @@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, i915_active_release(&vma->active); err_unlock: mutex_unlock(&vma->vm->mutex); -err_fence: - if (work) - dma_fence_work_commit_imm(&work->base); err_rpm: + i915_vm_free_pt_stash(vma->vm, &stash); if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); vma_put_pages(vma); @@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); else err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); - if (err != -ENOSPC) { - if (!err) { - err = i915_vma_wait_for_bind(vma); - if (err) - i915_vma_unpin(vma); - } + if (err != -ENOSPC) return err; - } /* Unlike i915_vma_pin, we don't take no for an answer! */ flush_idle_contexts(vm->gt); @@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma) trace_i915_vma_unbind(vma); vma->ops->unbind_vma(vma->vm, vma); } - atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE), + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE), &vma->flags); i915_vma_detach(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8df784a026d2..d1d0fc76ef99 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma, return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); } -struct i915_vma_work *i915_vma_work(void); int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags, - struct i915_vma_work *work); + struct i915_vm_pt_stash *stash); bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); bool i915_vma_misplaced(const struct i915_vma *vma, @@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma); -int i915_vma_wait_for_bind(struct i915_vma *vma); - static inline int i915_vma_sync(struct i915_vma *vma) { /* Wait for the asynchronous bindings and pending GPU reads */ diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 6b1bfa230b82..82868a755a96 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -240,9 +240,6 @@ struct i915_vma { #define I915_VMA_ALLOC_BIT 12 -#define I915_VMA_ERROR_BIT 13 -#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) - #define I915_VMA_GGTT_BIT 14 #define I915_VMA_CAN_FENCE_BIT 15 #define I915_VMA_USERFAULT_BIT 16 -- 2.31.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx