With the nouveau calls fixed there were only 2 places left that used
fence_lock without a reservation, those are fixed in this patch:
ttm_bo_cleanup_refs_or_queue is fixed by simply doing things the other
way around.
ttm_bo_cleanup_refs is fixed by taking a reservation first, then a
pointer
to the fence object and backs off from the reservation if waiting has to
be performed.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxx>
---
This patch should be applied only after all the previous patches I
sent are applied,
since it depends on sync_obj_arg removal (kinda, probably fails to
apply otherwise),
uses ttm_bo_is_reserved, and depends on nouveau wait behavior being
fixed.
drivers/gpu/drm/nouveau/nouveau_bo.c | 4 -
drivers/gpu/drm/nouveau/nouveau_gem.c | 15 +---
drivers/gpu/drm/radeon/radeon_object.c | 2 -
drivers/gpu/drm/ttm/ttm_bo.c | 124
++++++++++++--------------------
drivers/gpu/drm/ttm/ttm_bo_util.c | 3 -
drivers/gpu/drm/ttm/ttm_bo_vm.c | 5 -
drivers/gpu/drm/ttm/ttm_execbuf_util.c | 2 -
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 -
include/drm/ttm/ttm_bo_api.h | 12 +--
include/drm/ttm/ttm_bo_driver.h | 3 -
10 files changed, 52 insertions(+), 121 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index cee7448..9749c45 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1449,10 +1449,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo,
struct nouveau_fence *fence)
if (likely(fence))
nouveau_fence_ref(fence);
- spin_lock(&nvbo->bo.bdev->fence_lock);
old_fence = nvbo->bo.sync_obj;
nvbo->bo.sync_obj = fence;
- spin_unlock(&nvbo->bo.bdev->fence_lock);
nouveau_fence_unref(&old_fence);
}
@@ -1552,9 +1550,7 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo,
struct nouveau_vma *vma)
if (vma->node) {
if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) {
ttm_bo_reserve(&nvbo->bo, false, false, false, 0);
- spin_lock(&nvbo->bo.bdev->fence_lock);
ttm_bo_wait(&nvbo->bo, false, false, false);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
ttm_bo_unreserve(&nvbo->bo);
nouveau_vm_unmap(vma);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 6d8391d..eaa700a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -391,18 +391,11 @@ retry:
static int
validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo)
{
- struct nouveau_fence *fence = NULL;
+ struct nouveau_fence *fence = nvbo->bo.sync_obj;
int ret = 0;
- spin_lock(&nvbo->bo.bdev->fence_lock);
- if (nvbo->bo.sync_obj)
- fence = nouveau_fence_ref(nvbo->bo.sync_obj);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
-
- if (fence) {
+ if (fence)
ret = nouveau_fence_sync(fence, chan);
- nouveau_fence_unref(&fence);
- }
return ret;
}
@@ -614,9 +607,7 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device
*dev,
data |= r->vor;
}
- spin_lock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, false, false, false);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
if (ret) {
NV_ERROR(drm, "reloc wait_idle failed: %d\n", ret);
break;
@@ -848,11 +839,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device
*dev, void *data,
ret = ttm_bo_reserve(&nvbo->bo, true, false, false, 0);
if (!ret) {
- spin_lock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, true, true, true);
if (!no_wait && ret)
fence = nouveau_fence_ref(nvbo->bo.sync_obj);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
ttm_bo_unreserve(&nvbo->bo);
}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c
b/drivers/gpu/drm/radeon/radeon_object.c
index 808c444..df430e4 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -613,12 +613,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32
*mem_type, bool no_wait)
r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
if (unlikely(r != 0))
return r;
- spin_lock(&bo->tbo.bdev->fence_lock);
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
if (bo->tbo.sync_obj)
r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
- spin_unlock(&bo->tbo.bdev->fence_lock);
ttm_bo_unreserve(&bo->tbo);
return r;
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f6d7026..69fc29b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -498,28 +498,23 @@ static void ttm_bo_cleanup_refs_or_queue(struct
ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_global *glob = bo->glob;
- struct ttm_bo_driver *driver;
+ struct ttm_bo_driver *driver = bdev->driver;
void *sync_obj = NULL;
int put_count;
int ret;
- spin_lock(&bdev->fence_lock);
- (void) ttm_bo_wait(bo, false, false, true);
- if (!bo->sync_obj) {
-
- spin_lock(&glob->lru_lock);
-
- /**
- * Lock inversion between bo:reserve and bdev::fence_lock here,
- * but that's OK, since we're only trylocking.
- */
-
- ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+ spin_lock(&glob->lru_lock);
+ ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+ if (!ret) {
+ ret = ttm_bo_wait(bo, false, false, true);
- if (unlikely(ret == -EBUSY))
+ if (unlikely(ret == -EBUSY)) {
+ sync_obj = driver->sync_obj_ref(bo->sync_obj);
+ atomic_set(&bo->reserved, 0);
+ wake_up_all(&bo->event_queue);
goto queue;
+ }
- spin_unlock(&bdev->fence_lock);
put_count = ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
@@ -528,18 +523,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct
ttm_buffer_object *bo)
ttm_bo_list_ref_sub(bo, put_count, true);
return;
- } else {
- spin_lock(&glob->lru_lock);
}
queue:
- driver = bdev->driver;
- if (bo->sync_obj)
- sync_obj = driver->sync_obj_ref(bo->sync_obj);
-
kref_get(&bo->list_kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->fence_lock);
if (sync_obj) {
driver->sync_obj_flush(sync_obj);
@@ -565,25 +553,15 @@ static int ttm_bo_cleanup_refs(struct
ttm_buffer_object *bo,
bool no_wait_gpu)
{
struct ttm_bo_device *bdev = bo->bdev;
+ struct ttm_bo_driver *driver = bdev->driver;
struct ttm_bo_global *glob = bo->glob;
int put_count;
int ret = 0;
+ void *sync_obj;
retry:
- spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock);
-
- if (unlikely(ret != 0))
- return ret;
-
spin_lock(&glob->lru_lock);
- if (unlikely(list_empty(&bo->ddestroy))) {
- spin_unlock(&glob->lru_lock);
- return 0;
- }
-
ret = ttm_bo_reserve_locked(bo, interruptible,
no_wait_reserve, false, 0);
@@ -592,18 +570,37 @@ retry:
return ret;
}
- /**
- * We can re-check for sync object without taking
- * the bo::lock since setting the sync object requires
- * also bo::reserved. A busy object at this point may
- * be caused by another thread recently starting an accelerated
- * eviction.
- */
+ if (unlikely(list_empty(&bo->ddestroy))) {
+ atomic_set(&bo->reserved, 0);
+ wake_up_all(&bo->event_queue);
+ spin_unlock(&glob->lru_lock);
+ return 0;
+ }
+ ret = ttm_bo_wait(bo, false, false, true);
+
+ if (ret) {
+ if (no_wait_gpu) {
+ atomic_set(&bo->reserved, 0);
+ wake_up_all(&bo->event_queue);
+ spin_unlock(&glob->lru_lock);
+ return ret;
+ }
- if (unlikely(bo->sync_obj)) {
+ /**
+ * Take a reference to the fence and unreserve, if the wait
+ * was succesful and no new sync_obj was attached,
+ * ttm_bo_wait in retry will return ret = 0, and end the loop.
+ */
+
+ sync_obj = driver->sync_obj_ref(&bo->sync_obj);
atomic_set(&bo->reserved, 0);
wake_up_all(&bo->event_queue);
spin_unlock(&glob->lru_lock);
+
+ ret = driver->sync_obj_wait(bo->sync_obj, false,
interruptible);
+ driver->sync_obj_unref(&sync_obj);
+ if (ret)
+ return ret;
goto retry;
}
@@ -735,9 +732,7 @@ static int ttm_bo_evict(struct
ttm_buffer_object *bo, bool interruptible,
struct ttm_placement placement;
int ret = 0;
- spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
if (ret != -ERESTARTSYS) {
@@ -1054,7 +1049,6 @@ int ttm_bo_move_buffer(struct ttm_buffer_object
*bo,
{
int ret = 0;
struct ttm_mem_reg mem;
- struct ttm_bo_device *bdev = bo->bdev;
BUG_ON(!ttm_bo_is_reserved(bo));
@@ -1063,9 +1057,7 @@ int ttm_bo_move_buffer(struct
ttm_buffer_object *bo,
* Have the driver move function wait for idle when necessary,
* instead of doing it here.
*/
- spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock);
if (ret)
return ret;
mem.num_pages = bo->num_pages;
@@ -1554,7 +1546,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
bdev->glob = glob;
bdev->need_dma32 = need_dma32;
bdev->val_seq = 0;
- spin_lock_init(&bdev->fence_lock);
mutex_lock(&glob->device_list_mutex);
list_add_tail(&bdev->device_list, &glob->device_list);
mutex_unlock(&glob->device_list_mutex);
@@ -1690,52 +1681,33 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
bool lazy, bool interruptible, bool no_wait)
{
struct ttm_bo_driver *driver = bo->bdev->driver;
- struct ttm_bo_device *bdev = bo->bdev;
- void *sync_obj;
int ret = 0;
+ WARN_ON_ONCE(!ttm_bo_is_reserved(bo));
+
if (likely(bo->sync_obj == NULL))
return 0;
- while (bo->sync_obj) {
-
+ if (bo->sync_obj) {
if (driver->sync_obj_signaled(bo->sync_obj)) {
void *tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
- continue;
+ return 0;
}
if (no_wait)
return -EBUSY;
- sync_obj = driver->sync_obj_ref(bo->sync_obj);
- spin_unlock(&bdev->fence_lock);
- ret = driver->sync_obj_wait(sync_obj,
+ ret = driver->sync_obj_wait(bo->sync_obj,
lazy, interruptible);
if (unlikely(ret != 0)) {
- driver->sync_obj_unref(&sync_obj);
- spin_lock(&bdev->fence_lock);
return ret;
}
- spin_lock(&bdev->fence_lock);
- if (likely(bo->sync_obj == sync_obj)) {
- void *tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- clear_bit(TTM_BO_PRIV_FLAG_MOVING,
- &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&sync_obj);
- driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
- } else {
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&sync_obj);
- spin_lock(&bdev->fence_lock);
- }
+ driver->sync_obj_unref(&bo->sync_obj);
+ clear_bit(TTM_BO_PRIV_FLAG_MOVING,
+ &bo->priv_flags);
}
return 0;
}
@@ -1799,9 +1771,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink
*shrink)
* Wait for GPU, then move to system cached.
*/
- spin_lock(&bo->bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, false);
- spin_unlock(&bo->bdev->fence_lock);
if (unlikely(ret != 0))
goto out;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d80d1e8..84d6e01 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -622,7 +622,6 @@ int ttm_bo_move_accel_cleanup(struct
ttm_buffer_object *bo,
struct ttm_buffer_object *ghost_obj;
void *tmp_obj = NULL;
- spin_lock(&bdev->fence_lock);
if (bo->sync_obj) {
tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
@@ -630,7 +629,6 @@ int ttm_bo_move_accel_cleanup(struct
ttm_buffer_object *bo,
bo->sync_obj = driver->sync_obj_ref(sync_obj);
if (evict) {
ret = ttm_bo_wait(bo, false, false, false);
- spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
if (ret)
@@ -653,7 +651,6 @@ int ttm_bo_move_accel_cleanup(struct
ttm_buffer_object *bo,
*/
set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a877813..55718c2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -122,17 +122,14 @@ static int ttm_bo_vm_fault(struct
vm_area_struct *vma, struct vm_fault *vmf)
* move.
*/
- spin_lock(&bdev->fence_lock);
if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
ret = ttm_bo_wait(bo, false, true, false);
- spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
retval = (ret != -ERESTARTSYS) ?
VM_FAULT_SIGBUS : VM_FAULT_NOPAGE;
goto out_unlock;
}
- } else
- spin_unlock(&bdev->fence_lock);
+ }
ret = ttm_mem_io_lock(man, true);
if (unlikely(ret != 0)) {
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 721886e..51b5e97 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -207,7 +207,6 @@ void ttm_eu_fence_buffer_objects(struct list_head
*list, void *sync_obj)
driver = bdev->driver;
glob = bo->glob;
- spin_lock(&bdev->fence_lock);
spin_lock(&glob->lru_lock);
list_for_each_entry(entry, list, head) {
@@ -218,7 +217,6 @@ void ttm_eu_fence_buffer_objects(struct list_head
*list, void *sync_obj)
entry->reserved = false;
}
spin_unlock(&glob->lru_lock);
- spin_unlock(&bdev->fence_lock);
list_for_each_entry(entry, list, head) {
if (entry->old_sync_obj)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index ed3c1e7..e038c9a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -251,13 +251,10 @@ static void vmw_dummy_query_bo_prepare(struct
vmw_private *dev_priv)
volatile SVGA3dQueryResult *result;
bool dummy;
int ret;
- struct ttm_bo_device *bdev = &dev_priv->bdev;
struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
ttm_bo_reserve(bo, false, false, false, 0);
- spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, false);
- spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0))
(void) vmw_fallback_wait(dev_priv, false, true, 0, false,
10*HZ);
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 816d9b1..6c69224 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -222,6 +222,8 @@ struct ttm_buffer_object {
struct file *persistent_swap_storage;
struct ttm_tt *ttm;
bool evicted;
+ void *sync_obj;
+ unsigned long priv_flags;
/**
* Members protected by the bdev::lru_lock.
@@ -242,16 +244,6 @@ struct ttm_buffer_object {
atomic_t reserved;
/**
- * Members protected by struct buffer_object_device::fence_lock
- * In addition, setting sync_obj to anything else
- * than NULL requires bo::reserved to be held. This allows for
- * checking NULL while reserved but not holding the mentioned lock.
- */
-
- void *sync_obj;
- unsigned long priv_flags;
-
- /**
* Members protected by the bdev::vm_lock
*/
diff --git a/include/drm/ttm/ttm_bo_driver.h
b/include/drm/ttm/ttm_bo_driver.h
index 0c8c3b5..aac101b 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -515,8 +515,6 @@ struct ttm_bo_global {
*
* @driver: Pointer to a struct ttm_bo_driver struct setup by the
driver.
* @man: An array of mem_type_managers.
- * @fence_lock: Protects the synchronizing members on *all* bos
belonging
- * to this device.
* @addr_space_mm: Range manager for the device address space.
* lru_lock: Spinlock that protects the buffer+device lru lists and
* ddestroy lists.
@@ -539,7 +537,6 @@ struct ttm_bo_device {
struct ttm_bo_driver *driver;
rwlock_t vm_lock;
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
- spinlock_t fence_lock;
/*
* Protected by the vm lock.
*/
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel