On Tue, Apr 20, 2021 at 12:34:50PM +0200, Maarten Lankhorst wrote: > Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim > tainting the ggtt->mutex") moves the vma binding to dma_fence_work, > but dma_fence_work has has signalling fence semantics. > > On braswell, we can call stop_machine inside fence_work, causing a splat > because memory allocation inside stop_machine is allowed. > > This patch does not fix that lockdep splat yet, but will allow the next > patch to remove it. > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx> > Acked-by: Daniel Vetter <daniel.vetter@xxxxxxxx> Thomas pointed out that this removes pipelined pte writing for both ggtt and ppgtt, and that's a bit much. So need to retract my ack here. -Daniel > --- > drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 - > drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 - > drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 - > drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 - > drivers/gpu/drm/i915/gt/intel_gtt.h | 2 - > drivers/gpu/drm/i915/i915_gem.c | 6 - > drivers/gpu/drm/i915/i915_vma.c | 174 +++------------------ > drivers/gpu/drm/i915/i915_vma.h | 5 +- > drivers/gpu/drm/i915/i915_vma_types.h | 3 - > 9 files changed, 21 insertions(+), 178 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > index b0597de206de..ec04df0a3b89 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > @@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt, > if (!drm_mm_node_allocated(&ggtt->error_capture)) > return; > > - if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND) > - return; /* beware stop_machine() inversion */ > - > GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); > > mutex_lock(&ggtt->error_mutex); > diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c > index e08dff376339..0c5a9a767849 100644 > --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c > +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c > @@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) > ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); > ppgtt->base.vm.top = 1; > > - ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; > ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; > ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; > ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; > diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c > index 176c19633412..92f8a23e66cc 100644 > --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c > +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c > @@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) > goto err_free_pd; > } > > - ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; > ppgtt->vm.insert_entries = gen8_ppgtt_insert; > ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; > ppgtt->vm.clear_range = gen8_ppgtt_clear; > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c > index 670c1271e7d5..e1ec6edae1fb 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c > @@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt) > > list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { > GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > - i915_vma_wait_for_bind(vma); > > if (i915_vma_is_pinned(vma)) > continue; > @@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) > ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); > > ggtt->alias = ppgtt; > - ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; > > GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); > ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; > @@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) > IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { > ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; > ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; > - ggtt->vm.bind_async_flags = > - I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; > } > > ggtt->invalidate = gen8_ggtt_invalidate; > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h > index e67e34e17913..d9d2ca8b4b61 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h > @@ -230,8 +230,6 @@ struct i915_address_space { > u64 total; /* size addr space maps (ex. 2GB for ggtt) */ > u64 reserved; /* size addr space reserved */ > > - unsigned int bind_async_flags; > - > /* > * Each active user context has its own address space (in full-ppgtt). > * Since the vm may be shared between multiple contexts, we count how > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index b23f58e94cfb..4639c47c038b 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, > mutex_unlock(&ggtt->vm.mutex); > } > > - ret = i915_vma_wait_for_bind(vma); > - if (ret) { > - i915_vma_unpin(vma); > - return ERR_PTR(ret); > - } > - > return vma; > } > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index 07490db51cdc..e4b2590d41ce 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj, > return vma; > } > > -struct i915_vma_work { > - struct dma_fence_work base; > - struct i915_address_space *vm; > - struct i915_vm_pt_stash stash; > - struct i915_vma *vma; > - struct drm_i915_gem_object *pinned; > - struct i915_sw_dma_fence_cb cb; > - enum i915_cache_level cache_level; > - unsigned int flags; > -}; > - > -static int __vma_bind(struct dma_fence_work *work) > -{ > - struct i915_vma_work *vw = container_of(work, typeof(*vw), base); > - struct i915_vma *vma = vw->vma; > - > - vma->ops->bind_vma(vw->vm, &vw->stash, > - vma, vw->cache_level, vw->flags); > - return 0; > -} > - > -static void __vma_release(struct dma_fence_work *work) > -{ > - struct i915_vma_work *vw = container_of(work, typeof(*vw), base); > - > - if (vw->pinned) { > - __i915_gem_object_unpin_pages(vw->pinned); > - i915_gem_object_put(vw->pinned); > - } > - > - i915_vm_free_pt_stash(vw->vm, &vw->stash); > - i915_vm_put(vw->vm); > -} > - > -static const struct dma_fence_work_ops bind_ops = { > - .name = "bind", > - .work = __vma_bind, > - .release = __vma_release, > -}; > - > -struct i915_vma_work *i915_vma_work(void) > -{ > - struct i915_vma_work *vw; > - > - vw = kzalloc(sizeof(*vw), GFP_KERNEL); > - if (!vw) > - return NULL; > - > - dma_fence_work_init(&vw->base, &bind_ops); > - vw->base.dma.error = -EAGAIN; /* disable the worker by default */ > - > - return vw; > -} > - > -int i915_vma_wait_for_bind(struct i915_vma *vma) > -{ > - int err = 0; > - > - if (rcu_access_pointer(vma->active.excl.fence)) { > - struct dma_fence *fence; > - > - rcu_read_lock(); > - fence = dma_fence_get_rcu_safe(&vma->active.excl.fence); > - rcu_read_unlock(); > - if (fence) { > - err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT); > - dma_fence_put(fence); > - } > - } > - > - return err; > -} > - > /** > * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. > * @vma: VMA to map > @@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) > */ > int i915_vma_bind(struct i915_vma *vma, > enum i915_cache_level cache_level, > - u32 flags, > - struct i915_vma_work *work) > + u32 flags, struct i915_vm_pt_stash *stash) > { > u32 bind_flags; > u32 vma_flags; > @@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma, > GEM_BUG_ON(!vma->pages); > > trace_i915_vma_bind(vma, bind_flags); > - if (work && bind_flags & vma->vm->bind_async_flags) { > - struct dma_fence *prev; > - > - work->vma = vma; > - work->cache_level = cache_level; > - work->flags = bind_flags; > - > - /* > - * Note we only want to chain up to the migration fence on > - * the pages (not the object itself). As we don't track that, > - * yet, we have to use the exclusive fence instead. > - * > - * Also note that we do not want to track the async vma as > - * part of the obj->resv->excl_fence as it only affects > - * execution and not content or object's backing store lifetime. > - */ > - prev = i915_active_set_exclusive(&vma->active, &work->base.dma); > - if (prev) { > - __i915_sw_fence_await_dma_fence(&work->base.chain, > - prev, > - &work->cb); > - dma_fence_put(prev); > - } > - > - work->base.dma.error = 0; /* enable the queue_work() */ > - > - if (vma->obj) { > - __i915_gem_object_pin_pages(vma->obj); > - work->pinned = i915_gem_object_get(vma->obj); > - } > - } else { > - vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); > - } > + vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags); > > atomic_or(bind_flags, &vma->flags); > return 0; > @@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma, > if (!drm_mm_node_allocated(&vma->node)) > return false; > > - if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) > - return true; > - > if (vma->node.size < size) > return true; > > @@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) > if (unlikely(flags & ~bound)) > return false; > > - if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) > + if (unlikely(bound & I915_VMA_OVERFLOW)) > return false; > > if (!(bound & I915_VMA_PIN_MASK)) > @@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) > */ > mutex_lock(&vma->vm->mutex); > do { > - if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { > + if (unlikely(bound & I915_VMA_OVERFLOW)) { > pinned = false; > break; > } > @@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma) > int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > u64 size, u64 alignment, u64 flags) > { > - struct i915_vma_work *work = NULL; > intel_wakeref_t wakeref = 0; > unsigned int bound; > int err; > + struct i915_vm_pt_stash stash = {}; > > #ifdef CONFIG_PROVE_LOCKING > if (debug_locks && !WARN_ON(!ww) && vma->resv) > @@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > if (flags & PIN_GLOBAL) > wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); > > - if (flags & vma->vm->bind_async_flags) { > - /* lock VM */ > - err = i915_vm_lock_objects(vma->vm, ww); > + if (vma->vm->allocate_va_range) { > + err = i915_vm_alloc_pt_stash(vma->vm, > + &stash, > + vma->size); > if (err) > goto err_rpm; > > - work = i915_vma_work(); > - if (!work) { > - err = -ENOMEM; > + err = i915_vm_lock_objects(vma->vm, ww); > + if (err) > goto err_rpm; > - } > > - work->vm = i915_vm_get(vma->vm); > - > - /* Allocate enough page directories to used PTE */ > - if (vma->vm->allocate_va_range) { > - err = i915_vm_alloc_pt_stash(vma->vm, > - &work->stash, > - vma->size); > - if (err) > - goto err_fence; > - > - err = i915_vm_pin_pt_stash(vma->vm, > - &work->stash); > - if (err) > - goto err_fence; > - } > + err = i915_vm_pin_pt_stash(vma->vm, > + &stash); > + if (err) > + goto err_rpm; > } > > /* > @@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > err = mutex_lock_interruptible_nested(&vma->vm->mutex, > !(flags & PIN_GLOBAL)); > if (err) > - goto err_fence; > + goto err_rpm; > > /* No more allocations allowed now we hold vm->mutex */ > > @@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > } > > bound = atomic_read(&vma->flags); > - if (unlikely(bound & I915_VMA_ERROR)) { > - err = -ENOMEM; > - goto err_unlock; > - } > - > if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { > err = -EAGAIN; /* pins are meant to be fairly temporary */ > goto err_unlock; > @@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > GEM_BUG_ON(!vma->pages); > err = i915_vma_bind(vma, > vma->obj ? vma->obj->cache_level : 0, > - flags, work); > + flags, &stash); > if (err) > goto err_remove; > > @@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > i915_active_release(&vma->active); > err_unlock: > mutex_unlock(&vma->vm->mutex); > -err_fence: > - if (work) > - dma_fence_work_commit_imm(&work->base); > err_rpm: > + i915_vm_free_pt_stash(vma->vm, &stash); > if (wakeref) > intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); > vma_put_pages(vma); > @@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); > else > err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); > - if (err != -ENOSPC) { > - if (!err) { > - err = i915_vma_wait_for_bind(vma); > - if (err) > - i915_vma_unpin(vma); > - } > + if (err != -ENOSPC) > return err; > - } > > /* Unlike i915_vma_pin, we don't take no for an answer! */ > flush_idle_contexts(vm->gt); > @@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma) > trace_i915_vma_unbind(vma); > vma->ops->unbind_vma(vma->vm, vma); > } > - atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE), > + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE), > &vma->flags); > > i915_vma_detach(vma); > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h > index 8df784a026d2..d1d0fc76ef99 100644 > --- a/drivers/gpu/drm/i915/i915_vma.h > +++ b/drivers/gpu/drm/i915/i915_vma.h > @@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma, > return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); > } > > -struct i915_vma_work *i915_vma_work(void); > int i915_vma_bind(struct i915_vma *vma, > enum i915_cache_level cache_level, > u32 flags, > - struct i915_vma_work *work); > + struct i915_vm_pt_stash *stash); > > bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); > bool i915_vma_misplaced(const struct i915_vma *vma, > @@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); > void i915_vma_make_shrinkable(struct i915_vma *vma); > void i915_vma_make_purgeable(struct i915_vma *vma); > > -int i915_vma_wait_for_bind(struct i915_vma *vma); > - > static inline int i915_vma_sync(struct i915_vma *vma) > { > /* Wait for the asynchronous bindings and pending GPU reads */ > diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h > index 6b1bfa230b82..82868a755a96 100644 > --- a/drivers/gpu/drm/i915/i915_vma_types.h > +++ b/drivers/gpu/drm/i915/i915_vma_types.h > @@ -240,9 +240,6 @@ struct i915_vma { > > #define I915_VMA_ALLOC_BIT 12 > > -#define I915_VMA_ERROR_BIT 13 > -#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) > - > #define I915_VMA_GGTT_BIT 14 > #define I915_VMA_CAN_FENCE_BIT 15 > #define I915_VMA_USERFAULT_BIT 16 > -- > 2.31.0 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel