On Sun, Aug 7, 2011 at 4:39 PM, Marek Olšák <maraeo@xxxxxxxxx> wrote: > Sometimes we want to know whether a buffer is busy and wait for it (bo_wait). > However, sometimes it would be more useful to be able to query whether > a buffer is busy and being either read or written, and wait until it's stopped > being either read or written. The point of this is to be able to avoid > unnecessary waiting, e.g. if a GPU has written something to a buffer and is now > reading that buffer, and a CPU wants to map that buffer for read, it needs to > only wait for the last write. If there were no write, there wouldn't be any > waiting needed. > > This, or course, requires user space drivers to send read/write flags > with each relocation (like we have read/write domains in radeon, so we can > actually use those for something useful now). > > Now how this patch works: > > The read/write flags should passed to ttm_validate_buffer. TTM maintains > separate sync objects of the last read and write for each buffer, in addition > to the sync object of the last use of a buffer. ttm_bo_wait then operates > with one the sync objects. Just minor comment for extra safety see below, otherwise: Reviewed-by: Jerome Glisse <jglisse@xxxxxxxxxx> > Signed-off-by: Marek Olšák <maraeo@xxxxxxxxx> > --- > drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +- > drivers/gpu/drm/nouveau/nouveau_gem.c | 5 +- > drivers/gpu/drm/radeon/radeon_cs.c | 1 + > drivers/gpu/drm/radeon/radeon_object.h | 2 +- > drivers/gpu/drm/ttm/ttm_bo.c | 97 ++++++++++++++++++++++-------- > drivers/gpu/drm/ttm/ttm_bo_util.c | 26 +++++++-- > drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 +- > drivers/gpu/drm/ttm/ttm_execbuf_util.c | 17 +++++- > drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 1 + > include/drm/ttm/ttm_bo_api.h | 16 +++++- > include/drm/ttm/ttm_execbuf_util.h | 6 ++ > 11 files changed, 137 insertions(+), 39 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c > index 890d50e..e87e24b 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > @@ -1104,7 +1104,8 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nouveau_vma *vma) > if (vma->node) { > if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) { > spin_lock(&nvbo->bo.bdev->fence_lock); > - ttm_bo_wait(&nvbo->bo, false, false, false); > + ttm_bo_wait(&nvbo->bo, false, false, false, > + TTM_USAGE_READWRITE); > spin_unlock(&nvbo->bo.bdev->fence_lock); > nouveau_vm_unmap(vma); > } > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c > index 5f0bc57..322bf62 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c > @@ -589,7 +589,8 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev, > } > > spin_lock(&nvbo->bo.bdev->fence_lock); > - ret = ttm_bo_wait(&nvbo->bo, false, false, false); > + ret = ttm_bo_wait(&nvbo->bo, false, false, false, > + TTM_USAGE_READWRITE); > spin_unlock(&nvbo->bo.bdev->fence_lock); > if (ret) { > NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret); > @@ -825,7 +826,7 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, > nvbo = nouveau_gem_object(gem); > > spin_lock(&nvbo->bo.bdev->fence_lock); > - ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait); > + ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait, TTM_USAGE_READWRITE); > spin_unlock(&nvbo->bo.bdev->fence_lock); > drm_gem_object_unreference_unlocked(gem); > return ret; > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index fae00c0..14e8531 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -80,6 +80,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) > p->relocs[i].lobj.wdomain = r->write_domain; > p->relocs[i].lobj.rdomain = r->read_domains; > p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; > + p->relocs[i].lobj.tv.usage = TTM_USAGE_READWRITE; > p->relocs[i].handle = r->handle; > p->relocs[i].flags = r->flags; > radeon_bo_list_add_object(&p->relocs[i].lobj, > diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h > index ede6c13..e9dc8b2 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.h > +++ b/drivers/gpu/drm/radeon/radeon_object.h > @@ -130,7 +130,7 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, > if (mem_type) > *mem_type = bo->tbo.mem.mem_type; > if (bo->tbo.sync_obj) > - r = ttm_bo_wait(&bo->tbo, true, true, no_wait); > + r = ttm_bo_wait(&bo->tbo, true, true, no_wait, TTM_USAGE_READWRITE); > spin_unlock(&bo->tbo.bdev->fence_lock); > ttm_bo_unreserve(&bo->tbo); > return r; > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index 56619f6..bb8c86d 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -494,7 +494,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) > int ret; > > spin_lock(&bdev->fence_lock); > - (void) ttm_bo_wait(bo, false, false, true); > + (void) ttm_bo_wait(bo, false, false, true, TTM_USAGE_READWRITE); > if (!bo->sync_obj) { > > spin_lock(&glob->lru_lock); > @@ -562,7 +562,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > > retry: > spin_lock(&bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > + ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu, > + TTM_USAGE_READWRITE); > spin_unlock(&bdev->fence_lock); > > if (unlikely(ret != 0)) > @@ -721,7 +722,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, > int ret = 0; > > spin_lock(&bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > + ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu, > + TTM_USAGE_READWRITE); > spin_unlock(&bdev->fence_lock); > > if (unlikely(ret != 0)) { > @@ -1068,7 +1070,8 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, > * instead of doing it here. > */ > spin_lock(&bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > + ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu, > + TTM_USAGE_READWRITE); > spin_unlock(&bdev->fence_lock); > if (ret) > return ret; > @@ -1688,34 +1691,83 @@ out_unlock: > return ret; > } > > +static void ttm_bo_unref_sync_obj_locked(struct ttm_buffer_object *bo, > + void *sync_obj, > + void **extra_sync_obj) > +{ > + struct ttm_bo_device *bdev = bo->bdev; > + struct ttm_bo_driver *driver = bdev->driver; > + void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL; > + > + /* We must unref the sync obj wherever it's ref'd. > + * Note that if we unref bo->sync_obj, we can unref both the read > + * and write sync objs too, because they can't be newer than > + * bo->sync_obj, so they are no longer relevant. */ > + if (sync_obj == bo->sync_obj || > + sync_obj == bo->sync_obj_read) { > + tmp_obj_read = bo->sync_obj_read; > + bo->sync_obj_read = NULL; > + } > + if (sync_obj == bo->sync_obj || > + sync_obj == bo->sync_obj_write) { > + tmp_obj_write = bo->sync_obj_write; > + bo->sync_obj_write = NULL; > + } > + if (sync_obj == bo->sync_obj) { > + tmp_obj = bo->sync_obj; > + bo->sync_obj = NULL; > + } > + > + clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); > + spin_unlock(&bdev->fence_lock); > + if (tmp_obj) > + driver->sync_obj_unref(&tmp_obj); > + if (tmp_obj_read) > + driver->sync_obj_unref(&tmp_obj_read); > + if (tmp_obj_write) > + driver->sync_obj_unref(&tmp_obj_write); > + if (extra_sync_obj) > + driver->sync_obj_unref(extra_sync_obj); > + spin_lock(&bdev->fence_lock); > +} > + > int ttm_bo_wait(struct ttm_buffer_object *bo, > - bool lazy, bool interruptible, bool no_wait) > + bool lazy, bool interruptible, bool no_wait, > + enum ttm_buffer_usage usage) > { > struct ttm_bo_driver *driver = bo->bdev->driver; > struct ttm_bo_device *bdev = bo->bdev; > void *sync_obj; > void *sync_obj_arg; > int ret = 0; > + void **bo_sync_obj; > > - if (likely(bo->sync_obj == NULL)) > + switch (usage) { > + case TTM_USAGE_READ: > + bo_sync_obj = &bo->sync_obj_read; > + break; > + case TTM_USAGE_WRITE: > + bo_sync_obj = &bo->sync_obj_write; > + break; > + case TTM_USAGE_READWRITE: > + default: > + bo_sync_obj = &bo->sync_obj; > + } > + > + if (likely(*bo_sync_obj == NULL)) > return 0; > > - while (bo->sync_obj) { > + while (*bo_sync_obj) { > > - if (driver->sync_obj_signaled(bo->sync_obj, bo->sync_obj_arg)) { > - void *tmp_obj = bo->sync_obj; > - bo->sync_obj = NULL; > - clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); > - spin_unlock(&bdev->fence_lock); > - driver->sync_obj_unref(&tmp_obj); > - spin_lock(&bdev->fence_lock); > + if (driver->sync_obj_signaled(*bo_sync_obj, bo->sync_obj_arg)) { > + ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, NULL); > continue; > } > > if (no_wait) > return -EBUSY; > > - sync_obj = driver->sync_obj_ref(bo->sync_obj); > + sync_obj = driver->sync_obj_ref(*bo_sync_obj); > sync_obj_arg = bo->sync_obj_arg; > spin_unlock(&bdev->fence_lock); > ret = driver->sync_obj_wait(sync_obj, sync_obj_arg, > @@ -1726,16 +1778,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, > return ret; > } > spin_lock(&bdev->fence_lock); > - if (likely(bo->sync_obj == sync_obj && > + if (likely(*bo_sync_obj == sync_obj && > bo->sync_obj_arg == sync_obj_arg)) { > - void *tmp_obj = bo->sync_obj; > - bo->sync_obj = NULL; > - clear_bit(TTM_BO_PRIV_FLAG_MOVING, > - &bo->priv_flags); > - spin_unlock(&bdev->fence_lock); > - driver->sync_obj_unref(&sync_obj); > - driver->sync_obj_unref(&tmp_obj); > - spin_lock(&bdev->fence_lock); > + ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, &sync_obj); > } else { > spin_unlock(&bdev->fence_lock); > driver->sync_obj_unref(&sync_obj); > @@ -1759,7 +1804,7 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) > if (unlikely(ret != 0)) > return ret; > spin_lock(&bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, true, no_wait); > + ret = ttm_bo_wait(bo, false, true, no_wait, TTM_USAGE_READWRITE); > spin_unlock(&bdev->fence_lock); > if (likely(ret == 0)) > atomic_inc(&bo->cpu_writers); > @@ -1833,7 +1878,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) > */ > > spin_lock(&bo->bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, false, false); > + ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE); > spin_unlock(&bo->bdev->fence_lock); > > if (unlikely(ret != 0)) > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 77dbf40..0399573 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -436,6 +436,8 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, > atomic_set(&fbo->cpu_writers, 0); > > fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj); > + fbo->sync_obj_read = driver->sync_obj_ref(bo->sync_obj_read); > + fbo->sync_obj_write = driver->sync_obj_ref(bo->sync_obj_write); > kref_init(&fbo->list_kref); > kref_init(&fbo->kref); > fbo->destroy = &ttm_transfered_destroy; > @@ -618,20 +620,30 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, > struct ttm_mem_reg *old_mem = &bo->mem; > int ret; > struct ttm_buffer_object *ghost_obj; > - void *tmp_obj = NULL; > + void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL; > > spin_lock(&bdev->fence_lock); > - if (bo->sync_obj) { > + if (bo->sync_obj) > tmp_obj = bo->sync_obj; > - bo->sync_obj = NULL; > - } > + if (bo->sync_obj_read) > + tmp_obj_read = bo->sync_obj_read; > + if (bo->sync_obj_write) > + tmp_obj_write = bo->sync_obj_write; > + > bo->sync_obj = driver->sync_obj_ref(sync_obj); > + bo->sync_obj_read = driver->sync_obj_ref(sync_obj); > + bo->sync_obj_write = driver->sync_obj_ref(sync_obj); > bo->sync_obj_arg = sync_obj_arg; > if (evict) { > - ret = ttm_bo_wait(bo, false, false, false); > + ret = ttm_bo_wait(bo, false, false, false, > + TTM_USAGE_READWRITE); > spin_unlock(&bdev->fence_lock); > if (tmp_obj) > driver->sync_obj_unref(&tmp_obj); > + if (tmp_obj_read) > + driver->sync_obj_unref(&tmp_obj_read); > + if (tmp_obj_write) > + driver->sync_obj_unref(&tmp_obj_write); > if (ret) > return ret; > > @@ -655,6 +667,10 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, > spin_unlock(&bdev->fence_lock); > if (tmp_obj) > driver->sync_obj_unref(&tmp_obj); > + if (tmp_obj_read) > + driver->sync_obj_unref(&tmp_obj_read); > + if (tmp_obj_write) > + driver->sync_obj_unref(&tmp_obj_write); > > ret = ttm_buffer_object_transfer(bo, &ghost_obj); > if (ret) > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index 221b924..ff1e26f 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -122,7 +122,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > > spin_lock(&bdev->fence_lock); > if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { > - ret = ttm_bo_wait(bo, false, true, false); > + ret = ttm_bo_wait(bo, false, true, false, TTM_USAGE_READWRITE); > spin_unlock(&bdev->fence_lock); > if (unlikely(ret != 0)) { > retval = (ret != -ERESTARTSYS) ? > diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > index 3832fe1..0438296 100644 > --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c > +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > @@ -223,6 +223,14 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) > bo = entry->bo; > entry->old_sync_obj = bo->sync_obj; Here we should set entry->old_sync_obj_read & entry->old_sync_obj_write to NULL so we don't get bite by uninitialized memory (if caller fail or forget to do so) > bo->sync_obj = driver->sync_obj_ref(sync_obj); > + if (entry->usage & TTM_USAGE_READ) { > + entry->old_sync_obj_read = bo->sync_obj_read; > + bo->sync_obj_read = driver->sync_obj_ref(sync_obj); > + } > + if (entry->usage & TTM_USAGE_WRITE) { > + entry->old_sync_obj_write = bo->sync_obj_write; > + bo->sync_obj_write = driver->sync_obj_ref(sync_obj); > + } > bo->sync_obj_arg = entry->new_sync_obj_arg; > ttm_bo_unreserve_locked(bo); > entry->reserved = false; > @@ -231,8 +239,15 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) > spin_unlock(&bdev->fence_lock); > > list_for_each_entry(entry, list, head) { > - if (entry->old_sync_obj) > + if (entry->old_sync_obj) { > driver->sync_obj_unref(&entry->old_sync_obj); > + } > + if (entry->old_sync_obj_read) { > + driver->sync_obj_unref(&entry->old_sync_obj_read); > + } > + if (entry->old_sync_obj_write) { > + driver->sync_obj_unref(&entry->old_sync_obj_write); > + } > } > } > EXPORT_SYMBOL(ttm_eu_fence_buffer_objects); > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c > index 41b95ed..8ca3ddb 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c > @@ -224,6 +224,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, > if (unlikely(cur_validate_node == sw_context->cur_val_buf)) { > val_buf = &sw_context->val_bufs[cur_validate_node]; > val_buf->bo = ttm_bo_reference(bo); > + val_buf->usage = TTM_USAGE_READWRITE; > val_buf->new_sync_obj_arg = (void *) dev_priv; > list_add_tail(&val_buf->head, &sw_context->validate_nodes); > ++sw_context->cur_val_buf; > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 42e3469..da957bf 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -44,6 +44,11 @@ struct ttm_bo_device; > > struct drm_mm_node; > > +enum ttm_buffer_usage { > + TTM_USAGE_READ = 1, > + TTM_USAGE_WRITE = 2, > + TTM_USAGE_READWRITE = TTM_USAGE_READ | TTM_USAGE_WRITE > +}; > > /** > * struct ttm_placement > @@ -174,7 +179,10 @@ struct ttm_tt; > * the bo_device::lru_lock. > * @reserved: Deadlock-free lock used for synchronization state transitions. > * @sync_obj_arg: Opaque argument to synchronization object function. > - * @sync_obj: Pointer to a synchronization object. > + * @sync_obj: Pointer to a synchronization object of a last read or write, > + * whichever is later. > + * @sync_obj_read: Pointer to a synchronization object of a last read. > + * @sync_obj_write: Pointer to a synchronization object of a last write. > * @priv_flags: Flags describing buffer object internal state. > * @vm_rb: Rb node for the vm rb tree. > * @vm_node: Address space manager node. > @@ -258,6 +266,8 @@ struct ttm_buffer_object { > > void *sync_obj_arg; > void *sync_obj; > + void *sync_obj_read; > + void *sync_obj_write; > unsigned long priv_flags; > > /** > @@ -325,6 +335,7 @@ ttm_bo_reference(struct ttm_buffer_object *bo) > * @bo: The buffer object. > * @interruptible: Use interruptible wait. > * @no_wait: Return immediately if buffer is busy. > + * @usage: Whether to wait for the last read and/or the last write. > * > * This function must be called with the bo::mutex held, and makes > * sure any previous rendering to the buffer is completed. > @@ -334,7 +345,8 @@ ttm_bo_reference(struct ttm_buffer_object *bo) > * Returns -ERESTARTSYS if interrupted by a signal. > */ > extern int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, > - bool interruptible, bool no_wait); > + bool interruptible, bool no_wait, > + enum ttm_buffer_usage usage); > /** > * ttm_bo_validate > * > diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h > index 26cc7f9..375f299 100644 > --- a/include/drm/ttm/ttm_execbuf_util.h > +++ b/include/drm/ttm/ttm_execbuf_util.h > @@ -41,20 +41,26 @@ > * @bo: refcounted buffer object pointer. > * @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once > * adding a new sync object. > + * @usage Indicates how @bo is used by the device. > * @reserved: Indicates whether @bo has been reserved for validation. > * @removed: Indicates whether @bo has been removed from lru lists. > * @put_count: Number of outstanding references on bo::list_kref. > * @old_sync_obj: Pointer to a sync object about to be unreferenced > + * @old_sync_obj_read: Pointer to a read sync object about to be unreferenced. > + * @old_sync_obj_write: Pointer to a write sync object about to be unreferenced. > */ > > struct ttm_validate_buffer { > struct list_head head; > struct ttm_buffer_object *bo; > void *new_sync_obj_arg; > + enum ttm_buffer_usage usage; > bool reserved; > bool removed; > int put_count; > void *old_sync_obj; > + void *old_sync_obj_read; > + void *old_sync_obj_write; > }; > > /** > -- > 1.7.4.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel