Quoting Matthew Auld (2019-06-10 18:34:52) > On Mon, 10 Jun 2019 at 08:21, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > > > > If we let pages be allocated asynchronously, we also then want to push > > the binding process into an asynchronous task. Make it so, utilising the > > recent improvements to fence error tracking and struct_mutex reduction. > > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > [snip] > > > +static int queue_async_bind(struct i915_vma *vma, > > + enum i915_cache_level cache_level, > > + u32 flags) > > +{ > > + bool ready = true; > > + > > + /* We are not allowed to shrink inside vm->mutex! */ > > + vma->async.dma = kmalloc(sizeof(*vma->async.dma), > > + GFP_NOWAIT | __GFP_NOWARN); > > + if (!vma->async.dma) > > + return -ENOMEM; > > + > > + dma_fence_init(vma->async.dma, > > + &async_bind_ops, > > + &async_lock, > > + vma->vm->i915->mm.unordered_timeline, > > + 0); > > + > > + /* XXX find and avoid allocations under reservation_object locks */ > > + if (!i915_vma_trylock(vma)) { > > + kfree(fetch_and_zero(&vma->async.dma)); > > + return -EAGAIN; > > + } > > + > > + if (rcu_access_pointer(vma->resv->fence_excl)) { /* async pages */ > > + struct dma_fence *f = reservation_object_get_excl(vma->resv); > > + > > + if (!dma_fence_add_callback(f, > > + &vma->async.cb, > > + __queue_async_bind)) > > + ready = false; > > + } > > + reservation_object_add_excl_fence(vma->resv, vma->async.dma); > > + i915_vma_unlock(vma); > > + > > + i915_vm_get(vma->vm); > > + i915_vma_get(vma); > > + __i915_vma_pin(vma); /* avoid being shrunk */ > > + > > + vma->async.cache_level = cache_level; > > + vma->async.flags = flags; > > Do we need to move this stuff to before the add_callback? Yes. I'm used to having an i915_sw_fence_commit to have everything in place before I let go :( > > + if (ready) > > + __queue_async_bind(vma->async.dma, &vma->async.cb); > > + > > + return 0; > > +} > > + > > /** > > * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. > > * @vma: VMA to map > > @@ -293,17 +405,12 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > > u32 vma_flags; > > int ret; > > > > + GEM_BUG_ON(!flags); > > GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > > GEM_BUG_ON(vma->size > vma->node.size); > > - > > - if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start, > > - vma->node.size, > > - vma->vm->total))) > > - return -ENODEV; > > - > > - if (GEM_DEBUG_WARN_ON(!flags)) > > - return -EINVAL; > > - > > + GEM_BUG_ON(range_overflows(vma->node.start, > > + vma->node.size, > > + vma->vm->total)); > > bind_flags = 0; > > if (flags & PIN_GLOBAL) > > bind_flags |= I915_VMA_GLOBAL_BIND; > > @@ -318,14 +425,18 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, > > Are we aiming for vma_bind/vma_bind_async/vma_pin/vma_pin_async etc ? At the moment, it's a pretty straightforward split between global being synchronous and user being asynchronous. Now, user should always remain async, if only because we have to play games with avoiding allocations underneath vm->mutex; but we may want to make global optionally async (if we have some caller that cares). The implication of the question is that you would rather be a flag upfront so that we don't have to retrofit later on :) > > if (bind_flags == 0) > > return 0; > > > > - GEM_BUG_ON(!vma->pages); > > + if ((bind_flags & ~vma_flags) & I915_VMA_LOCAL_BIND) > > + bind_flags |= I915_VMA_ALLOC_BIND; > > > > trace_i915_vma_bind(vma, bind_flags); > > - ret = vma->ops->bind_vma(vma, cache_level, bind_flags); > > + if (bind_flags & I915_VMA_ALLOC_BIND) > > + ret = queue_async_bind(vma, cache_level, bind_flags); > > + else > > + ret = __vma_bind(vma, cache_level, bind_flags); > > if (ret) > > return ret; > > Looks like clear_pages() is called unconditionally in vma_remove() if > we error out here, even though that is now set as part of the worker? Right, I moved the ->set_pages, I should move the ->clear_pages. > > - vma->flags |= bind_flags; > > + vma->flags |= bind_flags & ~I915_VMA_ALLOC_BIND; > > return 0; > > } > > > > @@ -569,7 +680,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > > } > > > > if (vma->obj) { > > - ret = i915_gem_object_pin_pages(vma->obj); > > + ret = i915_gem_object_pin_pages_async(vma->obj); > > if (ret) > > return ret; > > > > @@ -578,25 +689,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > > cache_level = 0; > > } > > > > - GEM_BUG_ON(vma->pages); > > - > > - ret = vma->ops->set_pages(vma); > > - if (ret) > > - goto err_unpin; > > - > > I guess one casualty here is the !offset_fixed path for > huge-gtt-pages, since we need to inspect the vma->page_sizes below. > Though probably not the end of the world if that's the case. Whoops. That's a nuisance as there is no way we can know at this stage, so we'll just have to assume the worst. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx