With the nouveau calls fixed there were only 2 places left that used fence_lock without a reservation, those are fixed in this patch: ttm_bo_cleanup_refs_or_queue is fixed by simply doing things the other way around. ttm_bo_cleanup_refs is fixed by taking a reservation first, then a pointer to the fence object and backs off from the reservation if waiting has to be performed. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxx> --- This patch should be applied only after all the previous patches I sent are applied, since it depends on sync_obj_arg removal (kinda, probably fails to apply otherwise), uses ttm_bo_is_reserved, and depends on nouveau wait behavior being fixed. drivers/gpu/drm/nouveau/nouveau_bo.c | 4 - drivers/gpu/drm/nouveau/nouveau_gem.c | 15 +--- drivers/gpu/drm/radeon/radeon_object.c | 2 - drivers/gpu/drm/ttm/ttm_bo.c | 124 ++++++++++++-------------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 3 - drivers/gpu/drm/ttm/ttm_bo_vm.c | 5 - drivers/gpu/drm/ttm/ttm_execbuf_util.c | 2 - drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 - include/drm/ttm/ttm_bo_api.h | 12 +-- include/drm/ttm/ttm_bo_driver.h | 3 - 10 files changed, 52 insertions(+), 121 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index cee7448..9749c45 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1449,10 +1449,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) if (likely(fence)) nouveau_fence_ref(fence); - spin_lock(&nvbo->bo.bdev->fence_lock); old_fence = nvbo->bo.sync_obj; nvbo->bo.sync_obj = fence; - spin_unlock(&nvbo->bo.bdev->fence_lock); nouveau_fence_unref(&old_fence); } @@ -1552,9 +1550,7 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nouveau_vma *vma) if (vma->node) { if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) { ttm_bo_reserve(&nvbo->bo, false, false, false, 0); - spin_lock(&nvbo->bo.bdev->fence_lock); ttm_bo_wait(&nvbo->bo, false, false, false); - spin_unlock(&nvbo->bo.bdev->fence_lock); ttm_bo_unreserve(&nvbo->bo); nouveau_vm_unmap(vma); } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 6d8391d..eaa700a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -391,18 +391,11 @@ retry: static int validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo) { - struct nouveau_fence *fence = NULL; + struct nouveau_fence *fence = nvbo->bo.sync_obj; int ret = 0; - spin_lock(&nvbo->bo.bdev->fence_lock); - if (nvbo->bo.sync_obj) - fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); - - if (fence) { + if (fence) ret = nouveau_fence_sync(fence, chan); - nouveau_fence_unref(&fence); - } return ret; } @@ -614,9 +607,7 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev, data |= r->vor; } - spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, false); - spin_unlock(&nvbo->bo.bdev->fence_lock); if (ret) { NV_ERROR(drm, "reloc wait_idle failed: %d\n", ret); break; @@ -848,11 +839,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, ret = ttm_bo_reserve(&nvbo->bo, true, false, false, 0); if (!ret) { - spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, true, true, true); if (!no_wait && ret) fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); ttm_bo_unreserve(&nvbo->bo); } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 808c444..df430e4 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -613,12 +613,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); if (unlikely(r != 0)) return r; - spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f6d7026..69fc29b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -498,28 +498,23 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; - struct ttm_bo_driver *driver; + struct ttm_bo_driver *driver = bdev->driver; void *sync_obj = NULL; int put_count; int ret; - spin_lock(&bdev->fence_lock); - (void) ttm_bo_wait(bo, false, false, true); - if (!bo->sync_obj) { - - spin_lock(&glob->lru_lock); - - /** - * Lock inversion between bo:reserve and bdev::fence_lock here, - * but that's OK, since we're only trylocking. - */ - - ret = ttm_bo_reserve_locked(bo, false, true, false, 0); + spin_lock(&glob->lru_lock); + ret = ttm_bo_reserve_locked(bo, false, true, false, 0); + if (!ret) { + ret = ttm_bo_wait(bo, false, false, true); - if (unlikely(ret == -EBUSY)) + if (unlikely(ret == -EBUSY)) { + sync_obj = driver->sync_obj_ref(bo->sync_obj); + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); goto queue; + } - spin_unlock(&bdev->fence_lock); put_count = ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); @@ -528,18 +523,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) ttm_bo_list_ref_sub(bo, put_count, true); return; - } else { - spin_lock(&glob->lru_lock); } queue: - driver = bdev->driver; - if (bo->sync_obj) - sync_obj = driver->sync_obj_ref(bo->sync_obj); - kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock); - spin_unlock(&bdev->fence_lock); if (sync_obj) { driver->sync_obj_flush(sync_obj); @@ -565,25 +553,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool no_wait_gpu) { struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_driver *driver = bdev->driver; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0; + void *sync_obj; retry: - spin_lock(&bdev->fence_lock); - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bdev->fence_lock); - - if (unlikely(ret != 0)) - return ret; - spin_lock(&glob->lru_lock); - if (unlikely(list_empty(&bo->ddestroy))) { - spin_unlock(&glob->lru_lock); - return 0; - } - ret = ttm_bo_reserve_locked(bo, interruptible, no_wait_reserve, false, 0); @@ -592,18 +570,37 @@ retry: return ret; } - /** - * We can re-check for sync object without taking - * the bo::lock since setting the sync object requires - * also bo::reserved. A busy object at this point may - * be caused by another thread recently starting an accelerated - * eviction. - */ + if (unlikely(list_empty(&bo->ddestroy))) { + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); + spin_unlock(&glob->lru_lock); + return 0; + } + ret = ttm_bo_wait(bo, false, false, true); + + if (ret) { + if (no_wait_gpu) { + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); + spin_unlock(&glob->lru_lock); + return ret; + } - if (unlikely(bo->sync_obj)) { + /** + * Take a reference to the fence and unreserve, if the wait + * was succesful and no new sync_obj was attached, + * ttm_bo_wait in retry will return ret = 0, and end the loop. + */ + + sync_obj = driver->sync_obj_ref(&bo->sync_obj); atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue); spin_unlock(&glob->lru_lock); + + ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible); + driver->sync_obj_unref(&sync_obj); + if (ret) + return ret; goto retry; } @@ -735,9 +732,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0; - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) { @@ -1054,7 +1049,6 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, { int ret = 0; struct ttm_mem_reg mem; - struct ttm_bo_device *bdev = bo->bdev; BUG_ON(!ttm_bo_is_reserved(bo)); @@ -1063,9 +1057,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * Have the driver move function wait for idle when necessary, * instead of doing it here. */ - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bdev->fence_lock); if (ret) return ret; mem.num_pages = bo->num_pages; @@ -1554,7 +1546,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->glob = glob; bdev->need_dma32 = need_dma32; bdev->val_seq = 0; - spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&glob->device_list_mutex); @@ -1690,52 +1681,33 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, bool interruptible, bool no_wait) { struct ttm_bo_driver *driver = bo->bdev->driver; - struct ttm_bo_device *bdev = bo->bdev; - void *sync_obj; int ret = 0; + WARN_ON_ONCE(!ttm_bo_is_reserved(bo)); + if (likely(bo->sync_obj == NULL)) return 0; - while (bo->sync_obj) { - + if (bo->sync_obj) { if (driver->sync_obj_signaled(bo->sync_obj)) { void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&tmp_obj); - spin_lock(&bdev->fence_lock); - continue; + return 0; } if (no_wait) return -EBUSY; - sync_obj = driver->sync_obj_ref(bo->sync_obj); - spin_unlock(&bdev->fence_lock); - ret = driver->sync_obj_wait(sync_obj, + ret = driver->sync_obj_wait(bo->sync_obj, lazy, interruptible); if (unlikely(ret != 0)) { - driver->sync_obj_unref(&sync_obj); - spin_lock(&bdev->fence_lock); return ret; } - spin_lock(&bdev->fence_lock); - if (likely(bo->sync_obj == sync_obj)) { - void *tmp_obj = bo->sync_obj; - bo->sync_obj = NULL; - clear_bit(TTM_BO_PRIV_FLAG_MOVING, - &bo->priv_flags); - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&sync_obj); - driver->sync_obj_unref(&tmp_obj); - spin_lock(&bdev->fence_lock); - } else { - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&sync_obj); - spin_lock(&bdev->fence_lock); - } + driver->sync_obj_unref(&bo->sync_obj); + clear_bit(TTM_BO_PRIV_FLAG_MOVING, + &bo->priv_flags); } return 0; } @@ -1799,9 +1771,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) * Wait for GPU, then move to system cached. */ - spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->bdev->fence_lock); if (unlikely(ret != 0)) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index d80d1e8..84d6e01 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -622,7 +622,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_buffer_object *ghost_obj; void *tmp_obj = NULL; - spin_lock(&bdev->fence_lock); if (bo->sync_obj) { tmp_obj = bo->sync_obj; bo->sync_obj = NULL; @@ -630,7 +629,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bo->sync_obj = driver->sync_obj_ref(sync_obj); if (evict) { ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); if (ret) @@ -653,7 +651,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, */ set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index a877813..55718c2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -122,17 +122,14 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * move. */ - spin_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { ret = ttm_bo_wait(bo, false, true, false); - spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { retval = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; goto out_unlock; } - } else - spin_unlock(&bdev->fence_lock); + } ret = ttm_mem_io_lock(man, true); if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 721886e..51b5e97 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -207,7 +207,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) driver = bdev->driver; glob = bo->glob; - spin_lock(&bdev->fence_lock); spin_lock(&glob->lru_lock); list_for_each_entry(entry, list, head) { @@ -218,7 +217,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) entry->reserved = false; } spin_unlock(&glob->lru_lock); - spin_unlock(&bdev->fence_lock); list_for_each_entry(entry, list, head) { if (entry->old_sync_obj) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index ed3c1e7..e038c9a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -251,13 +251,10 @@ static void vmw_dummy_query_bo_prepare(struct vmw_private *dev_priv) volatile SVGA3dQueryResult *result; bool dummy; int ret; - struct ttm_bo_device *bdev = &dev_priv->bdev; struct ttm_buffer_object *bo = dev_priv->dummy_query_bo; ttm_bo_reserve(bo, false, false, false, 0); - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) (void) vmw_fallback_wait(dev_priv, false, true, 0, false, 10*HZ); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 816d9b1..6c69224 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -222,6 +222,8 @@ struct ttm_buffer_object { struct file *persistent_swap_storage; struct ttm_tt *ttm; bool evicted; + void *sync_obj; + unsigned long priv_flags; /** * Members protected by the bdev::lru_lock. @@ -242,16 +244,6 @@ struct ttm_buffer_object { atomic_t reserved; /** - * Members protected by struct buffer_object_device::fence_lock - * In addition, setting sync_obj to anything else - * than NULL requires bo::reserved to be held. This allows for - * checking NULL while reserved but not holding the mentioned lock. - */ - - void *sync_obj; - unsigned long priv_flags; - - /** * Members protected by the bdev::vm_lock */ diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 0c8c3b5..aac101b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -515,8 +515,6 @@ struct ttm_bo_global { * * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. - * @fence_lock: Protects the synchronizing members on *all* bos belonging - * to this device. * @addr_space_mm: Range manager for the device address space. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. @@ -539,7 +537,6 @@ struct ttm_bo_device { struct ttm_bo_driver *driver; rwlock_t vm_lock; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; - spinlock_t fence_lock; /* * Protected by the vm lock. */ _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel