Quoting Tvrtko Ursulin (2020-07-14 13:19:22) > > On 06/07/2020 07:19, Chris Wilson wrote: > > It is illegal to wait on an another vma while holding the vm->mutex, as > > that easily leads to ABBA deadlocks (we wait on a second vma that waits > > on us to release the vm->mutex). So while the vm->mutex exists, move the > > waiting outside of the lock into the async binding pipeline. > > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > --- > > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 21 +-- > > drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 137 +++++++++++++++++- > > drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h | 5 + > > 3 files changed, 151 insertions(+), 12 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > index 6a406e8798ef..c14c3b7e0dfd 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > @@ -1056,15 +1056,6 @@ static int eb_reserve_vma(struct eb_vm_work *work, struct eb_bind_vma *bind) > > return err; > > > > pin: > > - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { > > - err = __i915_vma_pin_fence(vma); /* XXX no waiting */ > > - if (unlikely(err)) > > - return err; > > - > > - if (vma->fence) > > - bind->ev->flags |= __EXEC_OBJECT_HAS_FENCE; > > - } > > - > > bind_flags &= ~atomic_read(&vma->flags); > > if (bind_flags) { > > err = set_bind_fence(vma, work); > > @@ -1095,6 +1086,15 @@ static int eb_reserve_vma(struct eb_vm_work *work, struct eb_bind_vma *bind) > > bind->ev->flags |= __EXEC_OBJECT_HAS_PIN; > > GEM_BUG_ON(eb_vma_misplaced(entry, vma, bind->ev->flags)); > > > > + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { > > + err = __i915_vma_pin_fence_async(vma, &work->base); > > + if (unlikely(err)) > > + return err; > > + > > + if (vma->fence) > > + bind->ev->flags |= __EXEC_OBJECT_HAS_FENCE; > > + } > > + > > return 0; > > } > > > > @@ -1160,6 +1160,9 @@ static void __eb_bind_vma(struct eb_vm_work *work) > > struct eb_bind_vma *bind = &work->bind[n]; > > struct i915_vma *vma = bind->ev->vma; > > > > + if (bind->ev->flags & __EXEC_OBJECT_HAS_FENCE) > > + __i915_vma_apply_fence_async(vma); > > + > > if (!bind->bind_flags) > > goto put; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c > > index 7fb36b12fe7a..734b6aa61809 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c > > @@ -21,10 +21,13 @@ > > * IN THE SOFTWARE. > > */ > > > > +#include "i915_active.h" > > #include "i915_drv.h" > > #include "i915_scatterlist.h" > > +#include "i915_sw_fence_work.h" > > #include "i915_pvinfo.h" > > #include "i915_vgpu.h" > > +#include "i915_vma.h" > > > > /** > > * DOC: fence register handling > > @@ -340,19 +343,37 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) > > return ERR_PTR(-EDEADLK); > > } > > > > +static int fence_wait_bind(struct i915_fence_reg *reg) > > +{ > > + struct dma_fence *fence; > > + int err = 0; > > + > > + fence = i915_active_fence_get(®->active.excl); > > + if (fence) { > > + err = dma_fence_wait(fence, true); > > + dma_fence_put(fence); > > + } > > + > > + return err; > > +} > > + > > int __i915_vma_pin_fence(struct i915_vma *vma) > > { > > struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); > > - struct i915_fence_reg *fence; > > + struct i915_fence_reg *fence = vma->fence; > > struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; > > int err; > > > > lockdep_assert_held(&vma->vm->mutex); > > > > /* Just update our place in the LRU if our fence is getting reused. */ > > - if (vma->fence) { > > - fence = vma->fence; > > + if (fence) { > > GEM_BUG_ON(fence->vma != vma); > > + > > + err = fence_wait_bind(fence); > > + if (err) > > + return err; > > + > > atomic_inc(&fence->pin_count); > > if (!fence->dirty) { > > list_move_tail(&fence->link, &ggtt->fence_list); > > @@ -384,6 +405,116 @@ int __i915_vma_pin_fence(struct i915_vma *vma) > > return err; > > } > > > > +static int set_bind_fence(struct i915_fence_reg *fence, > > + struct dma_fence_work *work) > > +{ > > + struct dma_fence *prev; > > + int err; > > + > > + if (rcu_access_pointer(fence->active.excl.fence) == &work->dma) > > + return 0; > > What is this checking for? Paranoia to avoid waiting upon ourselves. Should be possible to declare it a GEM_BUG_ON. > > + err = i915_sw_fence_await_active(&work->chain, > > + &fence->active, > > + I915_ACTIVE_AWAIT_ACTIVE); > > + if (err) > > + return err; > > + > > + if (i915_active_acquire(&fence->active)) > > + return -ENOENT; > > + > > + prev = i915_active_set_exclusive(&fence->active, &work->dma); > > + if (unlikely(prev)) { > > + err = i915_sw_fence_await_dma_fence(&work->chain, prev, 0, > > + GFP_NOWAIT | __GFP_NOWARN); > > This is a potential allocation under vm->mutex. Aye, and it is not allowed to shrink because we hold vm->mutex. It's the biggest problem with trying to build up a job with many dependencies, where each vma may need to evict multiple others and have a number of awaits. > > + dma_fence_put(prev); > > + } > > + > > + i915_active_release(&fence->active); > > + return err < 0 ? err : 0; > > +} > > + > > +int __i915_vma_pin_fence_async(struct i915_vma *vma, > > + struct dma_fence_work *work) > > +{ > > + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); > > + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; > > + struct i915_fence_reg *fence = vma->fence; > > + int err; > > + > > + lockdep_assert_held(&vma->vm->mutex); > > + > > + /* Just update our place in the LRU if our fence is getting reused. */ > > + if (fence) { > > + GEM_BUG_ON(fence->vma != vma); > > + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); > > + } else if (set) { > > + if (!i915_vma_is_map_and_fenceable(vma)) > > + return -EINVAL; > > + > > + fence = fence_find(ggtt); > > + if (IS_ERR(fence)) > > + return -ENOSPC; > > + > > + GEM_BUG_ON(atomic_read(&fence->pin_count)); > > + fence->dirty = true; > > + } else { > > + return 0; > > + } > > + > > + atomic_inc(&fence->pin_count); > > + list_move_tail(&fence->link, &ggtt->fence_list); > > + if (!fence->dirty) > > + return 0; > > + > > + if (INTEL_GEN(fence_to_i915(fence)) < 4 && > > + rcu_access_pointer(vma->active.excl.fence) != &work->dma) { > > This second part is the same check as in set_bind_fence. The await for implicit unfenced blits? That's subtly different. > Should it go to a helper with a self-descriptive name and then this > function would gate both the gen > 4 check and the set_bind_fence call > under it? > > > + /* implicit 'unfenced' GPU blits */ > > + err = i915_sw_fence_await_active(&work->chain, > > + &vma->active, > > + I915_ACTIVE_AWAIT_ACTIVE); > > + if (err) > > + goto err_unpin; > > + } > > + > > + err = set_bind_fence(fence, work); > > + if (err) > > + goto err_unpin; > > + > > + if (set) { > > + fence->start = vma->node.start; > > + fence->size = vma->fence_size; > > + fence->stride = i915_gem_object_get_stride(vma->obj); > > + fence->tiling = i915_gem_object_get_tiling(vma->obj); > > + > > + vma->fence = fence; > > + } else { > > + fence->tiling = 0; > > + vma->fence = NULL; > > + } > > + > > + set = xchg(&fence->vma, set); > > + if (set && set != vma) { > > + GEM_BUG_ON(set->fence != fence); > > + WRITE_ONCE(set->fence, NULL); > > + i915_vma_revoke_mmap(set); > > + } > > + > > + return 0; > > + > > +err_unpin: > > + atomic_dec(&fence->pin_count); > > + return err; > > +} > > + > > +void __i915_vma_apply_fence_async(struct i915_vma *vma) > > +{ > > + struct i915_fence_reg *fence = vma->fence; > > + > > + if (fence->dirty) > > + fence_write(fence); > > What is async in here? This executes the async work. The first function was pin_fence_async, so this was apply_fence_async because that looked symmetrical. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx