On Mon, 10 Jun 2019 at 08:21, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > > If we let pages be allocated asynchronously, we also then want to push > the binding process into an asynchronous task. Make it so, utilising the > recent improvements to fence error tracking and struct_mutex reduction. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> [snip] > +static int queue_async_bind(struct i915_vma *vma, > + enum i915_cache_level cache_level, > + u32 flags) > +{ > + bool ready = true; > + > + /* We are not allowed to shrink inside vm->mutex! */ > + vma->async.dma = kmalloc(sizeof(*vma->async.dma), > + GFP_NOWAIT | __GFP_NOWARN); > + if (!vma->async.dma) > + return -ENOMEM; > + > + dma_fence_init(vma->async.dma, > + &async_bind_ops, > + &async_lock, > + vma->vm->i915->mm.unordered_timeline, > + 0); > + > + /* XXX find and avoid allocations under reservation_object locks */ > + if (!i915_vma_trylock(vma)) { > + kfree(fetch_and_zero(&vma->async.dma)); > + return -EAGAIN; > + } > + > + if (rcu_access_pointer(vma->resv->fence_excl)) { /* async pages */ > + struct dma_fence *f = reservation_object_get_excl(vma->resv); > + > + if (!dma_fence_add_callback(f, > + &vma->async.cb, > + __queue_async_bind)) > + ready = false; > + } > + reservation_object_add_excl_fence(vma->resv, vma->async.dma); > + i915_vma_unlock(vma); > + > + i915_vm_get(vma->vm); > + i915_vma_get(vma); > + __i915_vma_pin(vma); /* avoid being shrunk */ > + > + vma->async.cache_level = cache_level; > + vma->async.flags = flags; Do we need to move this stuff to before the add_callback? > + > + if (ready) > + __queue_async_bind(vma->async.dma, &vma->async.cb); > + > + return 0; > +} > + > /** > * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. > * @vma: VMA to map > @@ -293,17 +405,12 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > u32 vma_flags; > int ret; > > + GEM_BUG_ON(!flags); > GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > GEM_BUG_ON(vma->size > vma->node.size); > - > - if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start, > - vma->node.size, > - vma->vm->total))) > - return -ENODEV; > - > - if (GEM_DEBUG_WARN_ON(!flags)) > - return -EINVAL; > - > + GEM_BUG_ON(range_overflows(vma->node.start, > + vma->node.size, > + vma->vm->total)); > bind_flags = 0; > if (flags & PIN_GLOBAL) > bind_flags |= I915_VMA_GLOBAL_BIND; > @@ -318,14 +425,18 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, Are we aiming for vma_bind/vma_bind_async/vma_pin/vma_pin_async etc ? > if (bind_flags == 0) > return 0; > > - GEM_BUG_ON(!vma->pages); > + if ((bind_flags & ~vma_flags) & I915_VMA_LOCAL_BIND) > + bind_flags |= I915_VMA_ALLOC_BIND; > > trace_i915_vma_bind(vma, bind_flags); > - ret = vma->ops->bind_vma(vma, cache_level, bind_flags); > + if (bind_flags & I915_VMA_ALLOC_BIND) > + ret = queue_async_bind(vma, cache_level, bind_flags); > + else > + ret = __vma_bind(vma, cache_level, bind_flags); > if (ret) > return ret; Looks like clear_pages() is called unconditionally in vma_remove() if we error out here, even though that is now set as part of the worker? > > - vma->flags |= bind_flags; > + vma->flags |= bind_flags & ~I915_VMA_ALLOC_BIND; > return 0; > } > > @@ -569,7 +680,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > } > > if (vma->obj) { > - ret = i915_gem_object_pin_pages(vma->obj); > + ret = i915_gem_object_pin_pages_async(vma->obj); > if (ret) > return ret; > > @@ -578,25 +689,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > cache_level = 0; > } > > - GEM_BUG_ON(vma->pages); > - > - ret = vma->ops->set_pages(vma); > - if (ret) > - goto err_unpin; > - I guess one casualty here is the !offset_fixed path for huge-gtt-pages, since we need to inspect the vma->page_sizes below. Though probably not the end of the world if that's the case. > if (flags & PIN_OFFSET_FIXED) { > u64 offset = flags & PIN_OFFSET_MASK; > if (!IS_ALIGNED(offset, alignment) || > range_overflows(offset, size, end)) { > ret = -EINVAL; > - goto err_clear; > + goto err_unpin; > } > > ret = i915_gem_gtt_reserve(vma->vm, &vma->node, > size, offset, cache_level, > flags); > if (ret) > - goto err_clear; > + goto err_unpin; > } else { > /* > * We only support huge gtt pages through the 48b PPGTT, > @@ -635,7 +740,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > size, alignment, cache_level, > start, end, flags); > if (ret) > - goto err_clear; > + goto err_unpin; > > GEM_BUG_ON(vma->node.start < start); > GEM_BUG_ON(vma->node.start + vma->node.size > end); > @@ -654,8 +759,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > > return 0; > > -err_clear: > - vma->ops->clear_pages(vma); > err_unpin: > if (vma->obj) > i915_gem_object_unpin_pages(vma->obj); > @@ -790,7 +893,7 @@ static void __i915_vma_destroy(struct i915_vma *vma) > > spin_lock(&obj->vma.lock); > list_del(&vma->obj_link); > - rb_erase(&vma->obj_node, &vma->obj->vma.tree); > + rb_erase(&vma->obj_node, &obj->vma.tree); > spin_unlock(&obj->vma.lock); > } > > @@ -930,6 +1033,11 @@ int i915_vma_unbind(struct i915_vma *vma) > > lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); > > + if (vma->async.dma && > + dma_fence_wait_timeout(vma->async.dma, true, > + MAX_SCHEDULE_TIMEOUT) < 0) > + return -EINTR; > + > ret = i915_active_wait(&vma->active); > if (ret) > return ret; > @@ -975,6 +1083,8 @@ int i915_vma_unbind(struct i915_vma *vma) > } > vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); > > + dma_fence_put(fetch_and_zero(&vma->async.dma)); > + > i915_vma_remove(vma); > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h > index 71088ff4ad59..67e43f5d01f6 100644 > --- a/drivers/gpu/drm/i915/i915_vma.h > +++ b/drivers/gpu/drm/i915/i915_vma.h > @@ -103,6 +103,7 @@ struct i915_vma { > #define I915_VMA_GLOBAL_BIND BIT(9) > #define I915_VMA_LOCAL_BIND BIT(10) > #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) > +#define I915_VMA_ALLOC_BIND I915_VMA_PIN_OVERFLOW /* not stored */ > > #define I915_VMA_GGTT BIT(11) > #define I915_VMA_CAN_FENCE BIT(12) > @@ -143,6 +144,14 @@ struct i915_vma { > unsigned int *exec_flags; > struct hlist_node exec_node; > u32 exec_handle; > + > + struct i915_vma_async_bind { > + struct dma_fence *dma; > + struct dma_fence_cb cb; > + struct work_struct work; > + unsigned int cache_level; > + unsigned int flags; > + } async; > }; > > struct i915_vma * > @@ -305,6 +314,11 @@ static inline void i915_vma_lock(struct i915_vma *vma) > reservation_object_lock(vma->resv, NULL); > } > > +static inline bool i915_vma_trylock(struct i915_vma *vma) > +{ > + return reservation_object_trylock(vma->resv); > +} > + > static inline void i915_vma_unlock(struct i915_vma *vma) > { > reservation_object_unlock(vma->resv); > diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c > index a166d9405a94..615ac485c731 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_vma.c > +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c > @@ -204,8 +204,10 @@ static int igt_vma_create(void *arg) > mock_context_close(ctx); > } > > - list_for_each_entry_safe(obj, on, &objects, st_link) > + list_for_each_entry_safe(obj, on, &objects, st_link) { > + i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); > i915_gem_object_put(obj); > + } > return err; > } > > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx