Use the per-object local lock to control the cache domain of the individual GEM objects, not struct_mutex. This is a huge leap forward for us in terms of object-level synchronisation; execbuffers are coordinated using the ww_mutex and pread/pwrite is finally fully serialised again. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 70 +++++----- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 125 +++++++++++------ drivers/gpu/drm/i915/gem/i915_gem_fence.c | 99 ++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 + drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 + drivers/gpu/drm/i915/gem/i915_gem_object.h | 14 ++ .../gpu/drm/i915/gem/selftests/huge_pages.c | 14 +- .../i915/gem/selftests/i915_gem_coherency.c | 12 ++ .../drm/i915/gem/selftests/i915_gem_context.c | 26 ++++ .../drm/i915/gem/selftests/i915_gem_mman.c | 6 + .../drm/i915/gem/selftests/i915_gem_phys.c | 4 +- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 + drivers/gpu/drm/i915/gvt/scheduler.c | 8 +- drivers/gpu/drm/i915/i915_cmd_parser.c | 23 ++-- drivers/gpu/drm/i915/i915_gem.c | 128 ++++++++++-------- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 + drivers/gpu/drm/i915/i915_perf.c | 4 +- drivers/gpu/drm/i915/i915_vma.c | 8 +- drivers/gpu/drm/i915/i915_vma.h | 12 ++ drivers/gpu/drm/i915/intel_display.c | 5 + drivers/gpu/drm/i915/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/intel_guc_log.c | 6 +- drivers/gpu/drm/i915/intel_lrc.c | 4 + drivers/gpu/drm/i915/intel_overlay.c | 25 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +- drivers/gpu/drm/i915/intel_uc_fw.c | 2 + .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_request.c | 8 ++ drivers/gpu/drm/i915/selftests/igt_spinner.c | 4 +- .../gpu/drm/i915/selftests/intel_hangcheck.c | 6 +- drivers/gpu/drm/i915/selftests/intel_lrc.c | 4 + .../drm/i915/selftests/intel_workarounds.c | 8 ++ 36 files changed, 482 insertions(+), 189 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_fence.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 84dfdf90e0e7..89fb4eaca4fb 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -80,6 +80,7 @@ i915-y += \ gem/i915_gem_dmabuf.o \ gem/i915_gem_domain.o \ gem/i915_gem_execbuffer.o \ + gem/i915_gem_fence.o \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ gem/i915_gem_mman.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 093bfff55a96..efab47250588 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -96,6 +96,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, { struct clflush *clflush; + assert_object_held(obj); + /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. @@ -145,9 +147,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, true, I915_FENCE_TIMEOUT, I915_FENCE_GFP); - reservation_object_lock(obj->resv, NULL); reservation_object_add_excl_fence(obj->resv, &clflush->dma); - reservation_object_unlock(obj->resv); i915_sw_fence_commit(&clflush->wait); } else if (obj->mm.pages) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index b5d86cfadd46..1585e54ef26b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -151,7 +151,6 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); int err; @@ -159,12 +158,12 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (err) return err; - err = i915_mutex_lock_interruptible(dev); + err = i915_gem_object_lock_interruptible(obj); if (err) goto out; err = i915_gem_object_set_to_cpu_domain(obj, write); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_unlock(obj); out: i915_gem_object_unpin_pages(obj); @@ -174,19 +173,18 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; int err; err = i915_gem_object_pin_pages(obj); if (err) return err; - err = i915_mutex_lock_interruptible(dev); + err = i915_gem_object_lock_interruptible(obj); if (err) goto out; err = i915_gem_object_set_to_gtt_domain(obj, false); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_unlock(obj); out: i915_gem_object_unpin_pages(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index c54e9e73212b..a0346bc483f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -31,9 +31,9 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) if (!READ_ONCE(obj->pin_global)) return; - mutex_lock(&obj->base.dev->struct_mutex); + i915_gem_object_lock(obj); __i915_gem_object_flush_for_display(obj); - mutex_unlock(&obj->base.dev->struct_mutex); + i915_gem_object_unlock(obj); } /** @@ -49,11 +49,10 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) { int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | (write ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT); if (ret) @@ -111,11 +110,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | (write ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT); if (ret) @@ -181,7 +179,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); + assert_object_held(obj); if (obj->cache_level == cache_level) return 0; @@ -230,7 +228,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, */ ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (ret) @@ -374,12 +371,16 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto out; - ret = i915_mutex_lock_interruptible(dev); + ret = mutex_lock_interruptible(&i915->drm.struct_mutex); if (ret) goto out; - ret = i915_gem_object_set_cache_level(obj, level); - mutex_unlock(&dev->struct_mutex); + ret = i915_gem_object_lock_interruptible(obj); + if (ret == 0) { + ret = i915_gem_object_set_cache_level(obj, level); + i915_gem_object_unlock(obj); + } + mutex_unlock(&i915->drm.struct_mutex); out: i915_gem_object_put(obj); @@ -401,7 +402,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); + assert_object_held(obj); /* Mark the global pin early so that we account for the * display coherency whilst setting up the cache domains. @@ -486,16 +487,18 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + struct drm_i915_gem_object *obj = vma->obj; + + assert_object_held(obj); - if (WARN_ON(vma->obj->pin_global == 0)) + if (WARN_ON(obj->pin_global == 0)) return; - if (--vma->obj->pin_global == 0) + if (--obj->pin_global == 0) vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ - i915_gem_object_bump_inactive_ggtt(vma->obj); + i915_gem_object_bump_inactive_ggtt(obj); i915_vma_unpin(vma); } @@ -513,11 +516,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | (write ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT); if (ret) @@ -619,7 +621,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; - err = i915_mutex_lock_interruptible(dev); + err = i915_gem_object_lock_interruptible(obj); if (err) goto out_unpin; @@ -633,7 +635,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, /* And bump the LRU for this access */ i915_gem_object_bump_inactive_ggtt(obj); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_unlock(obj); if (write_domain != 0) intel_fb_obj_invalidate(obj, @@ -656,22 +658,23 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, { int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); - *needs_clflush = 0; if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; + ret = i915_gem_object_lock_interruptible(obj); + if (ret) + return ret; + ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (ret) - return ret; + goto err_unlock; ret = i915_gem_object_pin_pages(obj); if (ret) - return ret; + goto err_unlock; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -699,6 +702,8 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, err_unpin: i915_gem_object_unpin_pages(obj); +err_unlock: + i915_gem_object_unlock(obj); return ret; } @@ -707,23 +712,24 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, { int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); - *needs_clflush = 0; if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; + ret = i915_gem_object_lock_interruptible(obj); + if (ret) + return ret; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (ret) - return ret; + goto err_unlock; ret = i915_gem_object_pin_pages(obj); if (ret) - return ret; + goto err_unlock; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -760,5 +766,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, err_unpin: i915_gem_object_unpin_pages(obj); +err_unlock: + i915_gem_object_unlock(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c4f05f60ec57..900417510fe0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1074,7 +1074,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); if (err) return ERR_PTR(err); @@ -1163,6 +1165,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } +static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + int err; + + i915_vma_lock(vma); + + if (obj->cache_dirty & ~obj->cache_coherent) + i915_gem_clflush_object(obj, 0); + obj->write_domain = 0; + + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + + i915_vma_unlock(vma); + + return err; +} + static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -1174,15 +1196,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, u32 *cmd; int err; - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) { - obj = vma->obj; - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - obj->write_domain = 0; - } - - GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU); - obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1195,7 +1208,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (IS_ERR(cmd)) return PTR_ERR(cmd); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_wc_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err_unmap; @@ -1215,7 +1230,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_unpin; } - err = i915_request_await_object(rq, vma->obj, true); + err = reloc_move_to_gpu(rq, vma); if (err) goto err_request; @@ -1223,14 +1238,12 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, batch->node.start, PAGE_SIZE, cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); if (err) - goto err_request; + goto skip_request; + i915_vma_lock(batch); GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(batch); if (err) goto skip_request; @@ -1839,24 +1852,59 @@ static int eb_relocate(struct i915_execbuffer *eb) static int eb_move_to_gpu(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; + struct ww_acquire_ctx acquire; unsigned int i; - int err; + int err = 0; + + ww_acquire_init(&acquire, &reservation_ww_class); for (i = 0; i < count; i++) { + struct i915_vma *vma = eb->vma[i]; + + err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); + if (!err) + continue; + + GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */ + + if (err == -EDEADLK) { + GEM_BUG_ON(i == 0); + do { + int j = i - 1; + + ww_mutex_unlock(&eb->vma[j]->resv->lock); + + swap(eb->flags[i], eb->flags[j]); + swap(eb->vma[i], eb->vma[j]); + eb->vma[i]->exec_flags = &eb->flags[i]; + } while (--i); + GEM_BUG_ON(vma != eb->vma[0]); + vma->exec_flags = &eb->flags[0]; + + err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, + &acquire); + } + if (err) + break; + } + ww_acquire_done(&acquire); + + while (i--) { unsigned int flags = eb->flags[i]; struct i915_vma *vma = eb->vma[i]; struct drm_i915_gem_object *obj = vma->obj; + assert_vma_held(vma); + if (flags & EXEC_OBJECT_CAPTURE) { struct i915_capture_list *capture; capture = kmalloc(sizeof(*capture), GFP_KERNEL); - if (unlikely(!capture)) - return -ENOMEM; - - capture->next = eb->request->capture_list; - capture->vma = eb->vma[i]; - eb->request->capture_list = capture; + if (capture) { + capture->next = eb->request->capture_list; + capture->vma = vma; + eb->request->capture_list = capture; + } } /* @@ -1876,24 +1924,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) flags &= ~EXEC_OBJECT_ASYNC; } - if (flags & EXEC_OBJECT_ASYNC) - continue; - - err = i915_request_await_object - (eb->request, obj, flags & EXEC_OBJECT_WRITE); - if (err) - return err; - } + if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) { + err = i915_request_await_object + (eb->request, obj, flags & EXEC_OBJECT_WRITE); + } - for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; + if (err == 0) + err = i915_vma_move_to_active(vma, eb->request, flags); - err = i915_vma_move_to_active(vma, eb->request, flags); - if (unlikely(err)) { - i915_request_skip(eb->request, err); - return err; - } + i915_vma_unlock(vma); __eb_unreserve_vma(vma, flags); vma->exec_flags = NULL; @@ -1901,12 +1940,20 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) i915_vma_put(vma); } + ww_acquire_fini(&acquire); + + if (unlikely(err)) + goto err_skip; + eb->exec = NULL; /* Unconditionally flush any chipset caches (for streaming writes). */ i915_gem_chipset_flush(eb->i915); - return 0; + +err_skip: + i915_request_skip(eb->request, err); + return err; } static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c new file mode 100644 index 000000000000..021425f30a66 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_gem_object.h" + +#include "../i915_drv.h" + +static DEFINE_SPINLOCK(fence_lock); + +struct stub_fence { + struct dma_fence dma; + struct i915_sw_fence chain; +}; + +static int __i915_sw_fence_call +stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct stub_fence *stub = container_of(fence, typeof(*stub), chain); + + switch (state) { + case FENCE_COMPLETE: + dma_fence_signal(&stub->dma); + break; + + case FENCE_FREE: + dma_fence_put(&stub->dma); + break; + } + + return NOTIFY_DONE; +} + +static const char *stub_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *stub_timeline_name(struct dma_fence *fence) +{ + return "object"; +} + +static void stub_release(struct dma_fence *fence) +{ + struct stub_fence *stub = container_of(fence, typeof(*stub), dma); + + i915_sw_fence_fini(&stub->chain); + + BUILD_BUG_ON(offsetof(typeof(*stub), dma)); + dma_fence_free(&stub->dma); +} + +static const struct dma_fence_ops stub_fence_ops = { + .get_driver_name = stub_driver_name, + .get_timeline_name = stub_timeline_name, + .release = stub_release, +}; + +struct dma_fence * +i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) +{ + struct stub_fence *stub; + + assert_object_held(obj); + + stub = kmalloc(sizeof(*stub), GFP_KERNEL); + if (!stub) + return NULL; + + dma_fence_init(&stub->dma, &stub_fence_ops, &fence_lock, + to_i915(obj->base.dev)->mm.unordered_timeline, + 0); + i915_sw_fence_init(&stub->chain, stub_notify); + + if (i915_sw_fence_await_reservation(&stub->chain, + obj->resv, NULL, + true, I915_FENCE_TIMEOUT, + I915_FENCE_GFP) < 0) + goto err; + + reservation_object_add_excl_fence(obj->resv, &stub->dma); + + return &stub->dma; + +err: + stub_release(&stub->dma); + return NULL; +} + +void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence) +{ + struct stub_fence *stub = container_of(fence, typeof(*stub), dma); + + i915_sw_fence_commit(&stub->chain); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 92a2b9cd879c..83baecb5baf0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -300,7 +300,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) goto err_unlock; } + i915_gem_object_lock(obj); ret = i915_gem_object_set_to_gtt_domain(obj, write); + i915_gem_object_unlock(obj); if (ret) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index be5952aa4c11..ebed1898a6cc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -379,6 +379,8 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; + assert_object_held(obj); + if (!(obj->write_domain & flush_domains)) return; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 1c820ddc5ab6..08016f1c9505 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -99,16 +99,29 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) __drm_gem_object_put(&obj->base); } +#define assert_object_held(obj) reservation_object_assert_held((obj)->resv) + static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) { reservation_object_lock(obj->resv, NULL); } +static inline int +i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) +{ + return reservation_object_lock_interruptible(obj->resv, NULL); +} + static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) { reservation_object_unlock(obj->resv); } +struct dma_fence * +i915_gem_object_lock_fence(struct drm_i915_gem_object *obj); +void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence); + static inline void i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) { @@ -358,6 +371,7 @@ static inline void i915_gem_object_finish_access(struct drm_i915_gem_object *obj) { i915_gem_object_unpin_pages(obj); + i915_gem_object_unlock(obj); } static inline struct intel_engine_cs * diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 5d1d76957145..189fdb5aece8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -893,7 +893,9 @@ gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) if (IS_ERR(obj)) return ERR_CAST(obj); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_wc_domain(obj, true); + i915_gem_object_unlock(obj); if (err) goto err; @@ -961,10 +963,6 @@ static int gpu_write(struct i915_vma *vma, GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - err = i915_gem_object_set_to_gtt_domain(vma->obj, true); - if (err) - return err; - batch = gpu_write_dw(vma, dword * sizeof(u32), value); if (IS_ERR(batch)) return PTR_ERR(batch); @@ -975,13 +973,19 @@ static int gpu_write(struct i915_vma *vma, goto err_batch; } + i915_vma_lock(batch); err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); if (err) goto err_request; i915_gem_object_set_active_reference(batch->obj); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_lock(vma); + err = i915_gem_object_set_to_gtt_domain(vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); if (err) goto err_request; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 4558d4828d61..aebc79500ca1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -78,7 +78,9 @@ static int gtt_set(struct drm_i915_gem_object *obj, u32 __iomem *map; int err; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); if (err) return err; @@ -105,7 +107,9 @@ static int gtt_get(struct drm_i915_gem_object *obj, u32 __iomem *map; int err; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) return err; @@ -131,7 +135,9 @@ static int wc_set(struct drm_i915_gem_object *obj, u32 *map; int err; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_wc_domain(obj, true); + i915_gem_object_unlock(obj); if (err) return err; @@ -152,7 +158,9 @@ static int wc_get(struct drm_i915_gem_object *obj, u32 *map; int err; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_wc_domain(obj, false); + i915_gem_object_unlock(obj); if (err) return err; @@ -176,7 +184,9 @@ static int gpu_set(struct drm_i915_gem_object *obj, u32 *cs; int err; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); if (err) return err; @@ -215,7 +225,9 @@ static int gpu_set(struct drm_i915_gem_object *obj, } intel_ring_advance(rq, cs); + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); i915_vma_unpin(vma); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index b4eaf761b0f6..9be75739c9fa 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -205,7 +205,9 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) *cmd = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(obj); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err; @@ -257,7 +259,9 @@ static int gpu_fill(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) return err; @@ -298,11 +302,15 @@ static int gpu_fill(struct drm_i915_gem_object *obj, if (err) goto err_request; + i915_vma_lock(batch); err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); if (err) goto skip_request; + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); if (err) goto skip_request; @@ -717,7 +725,9 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) i915_gem_object_unpin_map(obj); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err; @@ -755,7 +765,9 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) return err; @@ -779,11 +791,15 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto err_request; + i915_vma_lock(batch); err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); if (err) goto skip_request; + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); if (err) goto skip_request; @@ -1316,7 +1332,9 @@ static int write_to_scratch(struct i915_gem_context *ctx, *cmd = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(obj); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err; @@ -1344,7 +1362,9 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto err_request; + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); if (err) goto skip_request; @@ -1413,7 +1433,9 @@ static int read_from_scratch(struct i915_gem_context *ctx, *cmd = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(obj); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err; @@ -1441,7 +1463,9 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto err_request; + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); if (err) goto skip_request; @@ -1450,7 +1474,9 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_request_add(rq); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_cpu_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index f0e7b0e517cf..062c6bdea3d8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -109,7 +109,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, GEM_BUG_ON(view.partial.size > nreal); cond_resched(); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); if (err) { pr_err("Failed to flush to GTT write domain; err=%d\n", err); @@ -141,7 +143,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; + i915_gem_object_lock(obj); i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + i915_gem_object_unlock(obj); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -343,7 +347,9 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) return PTR_ERR(rq); } + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index b76b503b3999..8ecf8d7bd083 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -46,9 +46,9 @@ static int mock_phys_object(void *arg) } /* Make the object dirty so that put_pages must do copy back the data */ - mutex_lock(&i915->drm.struct_mutex); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unlock(obj); if (err) { pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", err); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 7cd737e513a0..2754a65aedf9 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2813,7 +2813,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) goto put_obj; } + i915_gem_object_lock(obj); ret = i915_gem_object_set_to_cpu_domain(obj, false); + i915_gem_object_unlock(obj); if (ret) { gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n"); goto unmap_src; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 62dd3a5e8413..d6b9b8b46f17 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -517,18 +517,18 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) } ret = i915_gem_object_set_to_gtt_domain(bb->obj, - false); + false); if (ret) goto err; - i915_gem_object_finish_access(bb->obj); - bb->accessing = false; - ret = i915_vma_move_to_active(bb->vma, workload->req, 0); if (ret) goto err; + + i915_gem_object_finish_access(bb->obj); + bb->accessing = false; } } return 0; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index acc7487cd85d..a710211300ce 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1057,19 +1057,20 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, void *dst, *src; int ret; - ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); if (ret) return ERR_PTR(ret); - ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); - if (ret) { - dst = ERR_PTR(ret); - goto unpin_src; - } - dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB); + i915_gem_object_finish_access(dst_obj); if (IS_ERR(dst)) - goto unpin_dst; + return dst; + + ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + if (ret) { + i915_gem_object_unpin_map(dst_obj); + return ERR_PTR(ret); + } src = ERR_PTR(-ENODEV); if (src_needs_clflush && @@ -1115,13 +1116,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } + i915_gem_object_finish_access(src_obj); + /* dst_obj is returned with vmap pinned */ *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; -unpin_dst: - i915_gem_object_finish_access(dst_obj); -unpin_src: - i915_gem_object_finish_access(src_obj); return dst; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 16e568d44f4c..a36162ba6a0e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -203,15 +203,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) lockdep_assert_held(&obj->base.dev->struct_mutex); - /* Closed vma are removed from the obj->vma_list - but they may - * still have an active binding on the object. To remove those we - * must wait for all rendering to complete to the object (as unbinding - * must anyway), and retire the requests. - */ - ret = i915_gem_object_set_to_cpu_domain(obj, false); - if (ret) - return ret; - spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, @@ -234,29 +225,17 @@ i915_gem_object_wait_fence(struct dma_fence *fence, unsigned int flags, long timeout) { - struct i915_request *rq; - BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return timeout; - if (!dma_fence_is_i915(fence)) - return dma_fence_wait_timeout(fence, - flags & I915_WAIT_INTERRUPTIBLE, - timeout); - - rq = to_request(fence); - if (i915_request_completed(rq)) - goto out; - - timeout = i915_request_wait(rq, flags, timeout); + if (dma_fence_is_i915(fence)) + return i915_request_wait(to_request(fence), flags, timeout); -out: - if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) - i915_request_retire_upto(rq); - - return timeout; + return dma_fence_wait_timeout(fence, + flags & I915_WAIT_INTERRUPTIBLE, + timeout); } static long @@ -557,21 +536,22 @@ static int i915_gem_shmem_pread(struct drm_i915_gem_object *obj, struct drm_i915_gem_pread *args) { - char __user *user_data; - u64 remain; unsigned int needs_clflush; unsigned int idx, offset; + struct dma_fence *fence; + char __user *user_data; + u64 remain; int ret; - ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); - if (ret) - return ret; - ret = i915_gem_object_prepare_read(obj, &needs_clflush); - mutex_unlock(&obj->base.dev->struct_mutex); if (ret) return ret; + fence = i915_gem_object_lock_fence(obj); + i915_gem_object_finish_access(obj); + if (!fence) + return -ENOMEM; + remain = args->size; user_data = u64_to_user_ptr(args->data_ptr); offset = offset_in_page(args->offset); @@ -589,7 +569,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, offset = 0; } - i915_gem_object_finish_access(obj); + i915_gem_object_unlock_fence(obj, fence); return ret; } @@ -625,8 +605,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &i915->ggtt; intel_wakeref_t wakeref; struct drm_mm_node node; - struct i915_vma *vma; + struct dma_fence *fence; void __user *user_data; + struct i915_vma *vma; u64 remain, offset; int ret; @@ -655,11 +636,25 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, GEM_BUG_ON(!node.allocated); } - ret = i915_gem_object_set_to_gtt_domain(obj, false); + mutex_unlock(&i915->drm.struct_mutex); + + ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; - mutex_unlock(&i915->drm.struct_mutex); + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) { + i915_gem_object_unlock(obj); + goto out_unpin; + } + + fence = i915_gem_object_lock_fence(obj); + i915_gem_object_unlock(obj); + if (!fence) { + ret = -ENOMEM; + goto out_unpin; + } + user_data = u64_to_user_ptr(args->data_ptr); remain = args->size; @@ -697,8 +692,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, offset += page_length; } - mutex_lock(&i915->drm.struct_mutex); + i915_gem_object_unlock_fence(obj, fence); out_unpin: + mutex_lock(&i915->drm.struct_mutex); if (node.allocated) { wmb(); ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); @@ -809,6 +805,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &i915->ggtt; intel_wakeref_t wakeref; struct drm_mm_node node; + struct dma_fence *fence; struct i915_vma *vma; u64 remain, offset; void __user *user_data; @@ -856,11 +853,24 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, GEM_BUG_ON(!node.allocated); } - ret = i915_gem_object_set_to_gtt_domain(obj, true); + mutex_unlock(&i915->drm.struct_mutex); + + ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; - mutex_unlock(&i915->drm.struct_mutex); + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) { + i915_gem_object_unlock(obj); + goto out_unpin; + } + + fence = i915_gem_object_lock_fence(obj); + i915_gem_object_unlock(obj); + if (!fence) { + ret = -ENOMEM; + goto out_unpin; + } intel_fb_obj_invalidate(obj, ORIGIN_CPU); @@ -905,8 +915,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, } intel_fb_obj_flush(obj, ORIGIN_CPU); - mutex_lock(&i915->drm.struct_mutex); + i915_gem_object_unlock_fence(obj, fence); out_unpin: + mutex_lock(&i915->drm.struct_mutex); if (node.allocated) { wmb(); ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); @@ -952,23 +963,23 @@ static int i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *args) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - void __user *user_data; - u64 remain; unsigned int partial_cacheline_write; unsigned int needs_clflush; unsigned int offset, idx; + struct dma_fence *fence; + void __user *user_data; + u64 remain; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - ret = i915_gem_object_prepare_write(obj, &needs_clflush); - mutex_unlock(&i915->drm.struct_mutex); if (ret) return ret; + fence = i915_gem_object_lock_fence(obj); + i915_gem_object_finish_access(obj); + if (!fence) + return -ENOMEM; + /* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch. @@ -996,7 +1007,8 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, } intel_fb_obj_flush(obj, ORIGIN_CPU); - i915_gem_object_finish_access(obj); + i915_gem_object_unlock_fence(obj, fence); + return ret; } @@ -1882,12 +1894,13 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ - mutex_lock(&i915->drm.struct_mutex); for (phase = phases; *phase; phase++) { - list_for_each_entry(obj, *phase, mm.link) + list_for_each_entry(obj, *phase, mm.link) { + i915_gem_object_lock(obj); WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + i915_gem_object_unlock(obj); + } } - mutex_unlock(&i915->drm.struct_mutex); intel_uc_sanitize(i915); i915_gem_sanitize(i915); @@ -2149,7 +2162,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (err) goto err_active; + i915_gem_object_lock(state->obj); err = i915_gem_object_set_to_cpu_domain(state->obj, false); + i915_gem_object_unlock(state->obj); if (err) goto err_active; @@ -2582,12 +2597,13 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); for (phase = phases; *phase; phase++) { - list_for_each_entry(obj, *phase, mm.link) + list_for_each_entry(obj, *phase, mm.link) { + i915_gem_object_lock(obj); WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); + i915_gem_object_unlock(obj); + } } - mutex_unlock(&i915->drm.struct_mutex); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 123b9225499e..913a68f6b07f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3548,8 +3548,11 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, PIN_UPDATE)); - if (obj) + if (obj) { + i915_gem_object_lock(obj); WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + i915_gem_object_unlock(obj); + } lock: mutex_lock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 056d2ad9a6d6..a608ddf74b5c 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -222,7 +222,9 @@ int i915_gem_render_state_emit(struct i915_request *rq) goto err_unpin; } + i915_vma_lock(so.vma); err = i915_vma_move_to_active(so.vma, rq, 0); + i915_vma_unlock(so.vma); err_unpin: i915_vma_unpin(so.vma); err_vma: diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 07726165c2f9..fc7fa4238046 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1512,9 +1512,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) goto unlock; } - ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); - if (ret) - goto err_unref; + i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); /* PreHSW required 512K alignment, HSW requires 16M */ vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 36726392e737..51a8d2b0d7b3 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -846,13 +846,14 @@ void i915_vma_destroy(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON(i915_vma_is_active(vma)); GEM_BUG_ON(i915_vma_is_pinned(vma)); if (i915_vma_is_closed(vma)) list_del(&vma->closed_link); WARN_ON(i915_vma_unbind(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + __i915_vma_destroy(vma); } @@ -914,12 +915,10 @@ static void export_fence(struct i915_vma *vma, * handle an error right now. Worst case should be missed * synchronisation leading to rendering corruption. */ - reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) reservation_object_add_excl_fence(resv, &rq->fence); else if (reservation_object_reserve_shared(resv, 1) == 0) reservation_object_add_shared_fence(resv, &rq->fence); - reservation_object_unlock(resv); } int i915_vma_move_to_active(struct i915_vma *vma, @@ -928,7 +927,8 @@ int i915_vma_move_to_active(struct i915_vma *vma, { struct drm_i915_gem_object *obj = vma->obj; - lockdep_assert_held(&rq->i915->drm.struct_mutex); + assert_vma_held(vma); + assert_object_held(obj); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); /* diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 7950fa96ee86..71ac7ee8620a 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -295,6 +295,18 @@ void i915_vma_close(struct i915_vma *vma); void i915_vma_reopen(struct i915_vma *vma); void i915_vma_destroy(struct i915_vma *vma); +#define assert_vma_held(vma) reservation_object_assert_held((vma)->resv) + +static inline void i915_vma_lock(struct i915_vma *vma) +{ + reservation_object_lock(vma->resv, NULL); +} + +static inline void i915_vma_unlock(struct i915_vma *vma) +{ + reservation_object_unlock(vma->resv); +} + int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); static inline int __must_check diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2d5f72d2fce2..463dcd87e932 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2056,6 +2056,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, * pin/unpin/fence and not more. */ wakeref = intel_runtime_pm_get(dev_priv); + i915_gem_object_lock(obj); atomic_inc(&dev_priv->gpu_error.pending_fb_pin); @@ -2110,6 +2111,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, err: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); + i915_gem_object_unlock(obj); intel_runtime_pm_put(dev_priv, wakeref); return vma; } @@ -2118,9 +2120,12 @@ void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + i915_gem_object_lock(vma->obj); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); + i915_gem_object_unlock(vma->obj); + i915_vma_put(vma); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 26a99f249bb7..381b52a26fe3 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -529,9 +529,7 @@ static int init_status_page(struct intel_engine_cs *engine) return PTR_ERR(obj); } - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - if (ret) - goto err; + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 7146524264dd..67eadc82c396 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -343,8 +343,6 @@ static void capture_logs_work(struct work_struct *work) static int guc_log_map(struct intel_guc_log *log) { - struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *dev_priv = guc_to_i915(guc); void *vaddr; int ret; @@ -353,9 +351,9 @@ static int guc_log_map(struct intel_guc_log *log) if (!log->vma) return -ENODEV; - mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_object_lock(log->vma->obj); ret = i915_gem_object_set_to_wc_domain(log->vma->obj, true); - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_object_unlock(log->vma->obj); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 863dc1bd278e..b9417d031743 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1550,7 +1550,9 @@ static int __context_pin(struct i915_vma *vma) * on an active context (which by nature is already on the GPU). */ if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + i915_gem_object_lock(vma->obj); err = i915_gem_object_set_to_wc_domain(vma->obj, true); + i915_gem_object_unlock(vma->obj); if (err) return err; } @@ -3008,7 +3010,9 @@ populate_lr_context(struct intel_context *ce, u32 *regs; int ret; + i915_gem_object_lock(ctx_obj); ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); + i915_gem_object_unlock(ctx_obj); if (ret) { DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); return ret; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a882b8d42bd9..0d3166b7e336 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -762,8 +762,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, atomic_inc(&dev_priv->gpu_error.pending_fb_pin); + i915_gem_object_lock(new_bo); vma = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, PIN_MAPPABLE); + i915_gem_object_unlock(new_bo); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out_pin_section; @@ -1302,15 +1304,20 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, static int get_registers(struct intel_overlay *overlay, bool use_phys) { + struct drm_i915_private *i915 = overlay->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; int err; - obj = i915_gem_object_create_stolen(overlay->i915, PAGE_SIZE); + mutex_lock(&i915->drm.struct_mutex); + + obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); if (obj == NULL) - obj = i915_gem_object_create_internal(overlay->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_unlock; + } vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) { @@ -1331,10 +1338,13 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) } overlay->reg_bo = obj; + mutex_unlock(&i915->drm.struct_mutex); return 0; err_put_bo: i915_gem_object_put(obj); +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1360,18 +1370,16 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) INIT_ACTIVE_REQUEST(&overlay->last_flip); - mutex_lock(&dev_priv->drm.struct_mutex); - ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv)); if (ret) goto out_free; + i915_gem_object_lock(overlay->reg_bo); ret = i915_gem_object_set_to_gtt_domain(overlay->reg_bo, true); + i915_gem_object_unlock(overlay->reg_bo); if (ret) goto out_reg_bo; - mutex_unlock(&dev_priv->drm.struct_mutex); - memset_io(overlay->regs, 0, sizeof(struct overlay_registers)); update_polyphase_filter(overlay->regs); update_reg_attrs(overlay, overlay->regs); @@ -1383,7 +1391,6 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) out_reg_bo: i915_gem_object_put(overlay->reg_bo); out_free: - mutex_unlock(&dev_priv->drm.struct_mutex); kfree(overlay); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 200cec60e632..07212e955304 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1199,10 +1199,12 @@ int intel_ring_pin(struct intel_ring *ring) flags |= PIN_HIGH; if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + i915_gem_object_lock(vma->obj); if (flags & PIN_MAPPABLE || map == I915_MAP_WC) ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); else ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); + i915_gem_object_unlock(vma->obj); if (unlikely(ret)) goto unpin_timeline; } @@ -1401,7 +1403,9 @@ static int __context_pin(struct intel_context *ce) * on an active context (which by nature is already on the GPU). */ if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + i915_gem_object_lock(vma->obj); err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + i915_gem_object_unlock(vma->obj); if (err) return err; } @@ -1486,10 +1490,8 @@ alloc_context_vma(struct intel_engine_cs *engine) * This is only applicable for Ivy Bridge devices since * later platforms don't have L3 control bits in the PTE. */ - if (IS_IVYBRIDGE(i915)) { - /* Ignore any error, regard it as a simple optimisation */ - i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); - } + if (IS_IVYBRIDGE(i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index e31fa3ac845d..f0b9b42da087 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -221,7 +221,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, intel_uc_fw_status_repr(uc_fw->load_status)); /* Pin object with firmware */ + i915_gem_object_lock(uc_fw->obj); err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false); + i915_gem_object_unlock(uc_fw->obj); if (err) { DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n", intel_uc_fw_type_repr(uc_fw->type), err); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 4c6900473060..753ae534df13 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -275,7 +275,7 @@ static int igt_evict_for_cache_color(void *arg) err = PTR_ERR(obj); goto cleanup; } - i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); quirk_add(obj, &objects); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -291,7 +291,7 @@ static int igt_evict_for_cache_color(void *arg) err = PTR_ERR(obj); goto cleanup; } - i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); quirk_add(obj, &objects); /* Neighbouring; same colour - should fit */ diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 72d9d22ce306..1162d37bc106 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -624,7 +624,9 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) i915_gem_object_unpin_map(obj); + i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err; @@ -778,7 +780,9 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (err) goto err; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_wc_domain(obj, true); + i915_gem_object_unlock(obj); if (err) goto err; @@ -878,7 +882,9 @@ static int live_all_engines(void *arg) i915_gem_object_set_active_reference(batch->obj); } + i915_vma_lock(batch); err = i915_vma_move_to_active(batch, request[id], 0); + i915_vma_unlock(batch); GEM_BUG_ON(err); i915_request_get(request[id]); @@ -993,7 +999,9 @@ static int live_sequential_engines(void *arg) GEM_BUG_ON(err); request[id]->batch = batch; + i915_vma_lock(batch); err = i915_vma_move_to_active(batch, request[id], 0); + i915_vma_unlock(batch); GEM_BUG_ON(err); i915_gem_object_set_active_reference(batch->obj); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index d0b93a3fbc54..d822072fe8df 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -29,7 +29,7 @@ int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915) goto err_hws; } - i915_gem_object_set_cache_level(spin->hws, I915_CACHE_LLC); + i915_gem_object_set_cache_coherency(spin->hws, I915_CACHE_LLC); vaddr = i915_gem_object_pin_map(spin->hws, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); @@ -74,7 +74,9 @@ static int move_to_active(struct i915_vma *vma, { int err; + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, flags); + i915_vma_unlock(vma); if (err) return err; diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index e174bfd73390..df1cb01ffb9c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -73,7 +73,7 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915) goto err_hws; } - i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); + i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC); vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); @@ -114,7 +114,9 @@ static int move_to_active(struct i915_vma *vma, { int err; + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, flags); + i915_vma_unlock(vma); if (err) return err; @@ -1329,7 +1331,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, } } + i915_vma_lock(arg.vma); err = i915_vma_move_to_active(arg.vma, rq, flags); + i915_vma_unlock(arg.vma); if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_vma_unpin_fence(arg.vma); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 22fed0a42766..57944543b1af 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -859,11 +859,13 @@ static int smoke_submit(struct preempt_smoke *smoke, } if (vma) { + i915_vma_lock(vma); err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); if (!err) err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); } i915_request_add(rq); @@ -1022,7 +1024,9 @@ static int live_preempt_smoke(void *arg) cs[n] = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(smoke.batch); + i915_gem_object_lock(smoke.batch); err = i915_gem_object_set_to_gtt_domain(smoke.batch, false); + i915_gem_object_unlock(smoke.batch); if (err) goto err_batch; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 803a01ad9fdc..c068d64543d9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -112,7 +112,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) goto err_pin; } + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); if (err) goto err_req; @@ -189,8 +191,10 @@ static int check_whitelist(struct i915_gem_context *ctx, return PTR_ERR(results); err = 0; + i915_gem_object_lock(results); igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); + i915_gem_object_unlock(results); if (i915_terminally_wedged(ctx->i915)) err = -EIO; if (err) @@ -372,7 +376,9 @@ static struct i915_vma *create_scratch(struct i915_gem_context *ctx) if (err) goto err_obj; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_cpu_domain(obj, false); + i915_gem_object_unlock(obj); if (err) goto err_obj; @@ -403,7 +409,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx) if (err) goto err_obj; + i915_gem_object_lock(obj); err = i915_gem_object_set_to_wc_domain(obj, true); + i915_gem_object_unlock(obj); if (err) goto err_obj; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx