On Thu, 28 Nov 2019 at 22:42, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > > Avoid dangerous race handling of destroyed vma by unbinding all vma > instead. Unfortunately, this stops us from trying to be clever and only > doing the minimal change required, so on first use of scanout we may > encounter an annoying stall as it transitions to a new cache level. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112413 > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/gem/i915_gem_domain.c | 124 ++------------------- > 1 file changed, 7 insertions(+), 117 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c > index 9aebcf263191..bd846b4fec20 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c > @@ -192,126 +192,16 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > if (obj->cache_level == cache_level) > return 0; > > - /* Inspect the list of currently bound VMA and unbind any that would > - * be invalid given the new cache-level. This is principally to > - * catch the issue of the CS prefetch crossing page boundaries and > - * reading an invalid PTE on older architectures. > - */ > -restart: > - list_for_each_entry(vma, &obj->vma.list, obj_link) { > - if (!drm_mm_node_allocated(&vma->node)) > - continue; > - > - if (i915_vma_is_pinned(vma)) { > - DRM_DEBUG("can not change the cache level of pinned objects\n"); > - return -EBUSY; > - } > - > - if (!i915_vma_is_closed(vma) && > - i915_gem_valid_gtt_space(vma, cache_level)) > - continue; > - > - ret = i915_vma_unbind(vma); > - if (ret) > - return ret; > - > - /* As unbinding may affect other elements in the > - * obj->vma_list (due to side-effects from retiring > - * an active vma), play safe and restart the iterator. > - */ > - goto restart; > - } > - > - /* We can reuse the existing drm_mm nodes but need to change the > - * cache-level on the PTE. We could simply unbind them all and > - * rebind with the correct cache-level on next use. However since > - * we already have a valid slot, dma mapping, pages etc, we may as > - * rewrite the PTE in the belief that doing so tramples upon less > - * state and so involves less work. > - */ > - if (atomic_read(&obj->bind_count)) { > - struct drm_i915_private *i915 = to_i915(obj->base.dev); > - > - /* Before we change the PTE, the GPU must not be accessing it. > - * If we wait upon the object, we know that all the bound > - * VMA are no longer active. > - */ > - ret = i915_gem_object_wait(obj, > - I915_WAIT_INTERRUPTIBLE | > - I915_WAIT_ALL, > - MAX_SCHEDULE_TIMEOUT); > - if (ret) > - return ret; > - > - if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) { > - intel_wakeref_t wakeref = > - intel_runtime_pm_get(&i915->runtime_pm); > - > - /* > - * Access to snoopable pages through the GTT is > - * incoherent and on some machines causes a hard > - * lockup. Relinquish the CPU mmaping to force > - * userspace to refault in the pages and we can > - * then double check if the GTT mapping is still > - * valid for that pointer access. > - */ > - ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex); > - if (ret) { > - intel_runtime_pm_put(&i915->runtime_pm, > - wakeref); > - return ret; > - } > - > - if (obj->userfault_count) > - __i915_gem_object_release_mmap(obj); > - > - /* > - * As we no longer need a fence for GTT access, > - * we can relinquish it now (and so prevent having > - * to steal a fence from someone else on the next > - * fence request). Note GPU activity would have > - * dropped the fence as all snoopable access is > - * supposed to be linear. > - */ > - for_each_ggtt_vma(vma, obj) { > - ret = i915_vma_revoke_fence(vma); > - if (ret) > - break; > - } > - mutex_unlock(&i915->ggtt.vm.mutex); > - intel_runtime_pm_put(&i915->runtime_pm, wakeref); > - if (ret) > - return ret; > - } else { > - /* > - * We either have incoherent backing store and > - * so no GTT access or the architecture is fully > - * coherent. In such cases, existing GTT mmaps > - * ignore the cache bit in the PTE and we can > - * rewrite it without confusing the GPU or having > - * to force userspace to fault back in its mmaps. > - */ > - } > - > - list_for_each_entry(vma, &obj->vma.list, obj_link) { > - if (!drm_mm_node_allocated(&vma->node)) > - continue; > - > - /* Wait for an earlier async bind, need to rewrite it */ > - ret = i915_vma_sync(vma); > - if (ret) > - return ret; > - > - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL); > - if (ret) > - return ret; > - } > - } > + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); > + if (ret) > + return ret; > > - list_for_each_entry(vma, &obj->vma.list, obj_link) { > + spin_lock(&obj->vma.lock); > + list_for_each_entry(vma, &obj->vma.list, obj_link) > if (i915_vm_has_cache_coloring(vma->vm)) > vma->node.color = cache_level; > - } > + spin_unlock(&obj->vma.lock); Do we still need to bother setting node.color; if we unbind the vma, the color will be set again if we re-bind it? Although we only set the cache-level below so seems kinda racy either way? Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> > + > i915_gem_object_set_cache_coherency(obj, cache_level); > obj->cache_dirty = true; /* Always invalidate stale cachelines */ > > -- > 2.24.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx