Userspace currently busywaits for fences to complete; on my workload, this busywait consumes 10% of the available CPU time. Provide an ioctl so that userspace can wait for an EOP interrupt that corresponds to a previous EVENT_WRITE_EOP. This currently doesn't work, hence the debug code piled in. Signed-off-by: Simon Farnsworth <simon.farnsworth@xxxxxxxxxxxx> --- drivers/gpu/drm/radeon/evergreen.c | 8 ++-- drivers/gpu/drm/radeon/radeon.h | 3 + drivers/gpu/drm/radeon/radeon_device.c | 1 + drivers/gpu/drm/radeon/radeon_fence.c | 3 + drivers/gpu/drm/radeon/radeon_gem.c | 70 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/drm/radeon_drm.h | 28 +++++++++++++ 7 files changed, 110 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 0c5dd78..5b886b0 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3083,11 +3083,11 @@ restart_ih: case 176: /* CP_INT in ring buffer */ case 177: /* CP_INT in IB1 */ case 178: /* CP_INT in IB2 */ - DRM_DEBUG("IH: CP int: 0x%08x\n", src_data); + printk(KERN_INFO "IH: CP int: 0x%08x\n", src_data); radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; case 181: /* CP EOP event */ - DRM_DEBUG("IH: CP EOP\n"); + printk(KERN_INFO "IH: CP EOP\n"); if (rdev->family >= CHIP_CAYMAN) { switch (src_data) { case 0: @@ -3104,12 +3104,12 @@ restart_ih: radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; case 233: /* GUI IDLE */ - DRM_DEBUG("IH: GUI idle\n"); + printk(KERN_INFO "IH: GUI idle\n"); rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: - DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); + printk(KERN_INFO "Unhandled interrupt: %d %d\n", src_id, src_data); break; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2859406..fb0eafd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1348,6 +1348,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); /* VRAM scratch page for HDP bug, default vram page */ struct r600_vram_scratch { @@ -1444,6 +1446,7 @@ struct radeon_device { struct radeon_mman mman; rwlock_t fence_lock; struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; + wait_queue_head_t userspace_fence_wait_queue; struct radeon_semaphore_driver semaphore_drv; struct radeon_ring ring[RADEON_NUM_RINGS]; struct radeon_ib_pool ib_pool; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 0afb13b..dcf11e5 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -730,6 +730,7 @@ int radeon_device_init(struct radeon_device *rdev, mutex_init(&rdev->pm.mutex); mutex_init(&rdev->vram_mutex); rwlock_init(&rdev->fence_lock); + init_waitqueue_head(&rdev->userspace_fence_wait_queue); rwlock_init(&rdev->semaphore_drv.lock); INIT_LIST_HEAD(&rdev->gem.objects); init_waitqueue_head(&rdev->irq.vblank_queue); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 64ea3dd..5b8270f 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -355,7 +355,10 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) write_unlock_irqrestore(&rdev->fence_lock, irq_flags); if (wake) { wake_up_all(&rdev->fence_drv[ring].queue); + printk( KERN_INFO "Woke kernel fences\n" ); } + printk( KERN_INFO "Waking up all waiters\n" ); + wake_up_interruptible_all(&rdev->userspace_fence_wait_queue); } int radeon_fence_count_emitted(struct radeon_device *rdev, int ring) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 7337850..6866f75 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -531,3 +531,73 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv, { return drm_gem_handle_delete(file_priv, handle); } + +int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_wait_user_fence *args = data; + struct radeon_device *rdev = dev->dev_private; + struct drm_gem_object *gobj; + struct radeon_bo *robj; + void *buffer_data; + uint32_t *fence_data; + int r = 0; + long timeout; + + printk( KERN_INFO "wait_user_fence offset %lld value %d timeout %lld\n", args->offset, args->value, args->timeout_usec ); + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + return -ENOENT; + } + robj = gem_to_radeon_bo(gobj); + + if (gobj->size < args->offset) { + printk( KERN_INFO "Offset too large\n" ); + r = -EINVAL; + goto unreference; + } + + r = radeon_bo_reserve(robj, true); + if (r) { + printk( KERN_INFO "Reserve fail\n" ); + goto unreference; + } + + r = radeon_bo_pin(robj, RADEON_GEM_DOMAIN_GTT, NULL); + if (r) { + printk( KERN_INFO "Pin fail\n" ); + goto unreserve; + } + + r = radeon_bo_kmap(robj, &buffer_data); + if (r) { + printk( KERN_INFO "kmap fail\n" ); + goto unpin; + } + + fence_data = (uint32_t*)buffer_data; + + printk( KERN_INFO "Current data value %d\n", fence_data[args->offset >> 2] ); + + timeout = wait_event_interruptible_timeout(rdev->userspace_fence_wait_queue, + fence_data[args->offset >> 2] != args->value, + usecs_to_jiffies(args->timeout_usec)); + if (timeout == 0) + r = -ETIMEDOUT; + else if (timeout < 0) + r = timeout; + + printk( KERN_INFO "wait_user_fence offset %lld value %d timeout %lld\n", args->offset, args->value, args->timeout_usec ); + printk( KERN_INFO "Finished data value %d\n", fence_data[args->offset >> 2] ); + + radeon_bo_kunmap(robj); +unpin: + radeon_bo_unpin(robj); +unreserve: + radeon_bo_unreserve(robj); +unreference: + drm_gem_object_unreference_unlocked(gobj); + + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index d335288..0e552cc 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -496,5 +496,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_USER_FENCE, radeon_gem_wait_user_fence_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index dd2e9cf..c261c8c 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -510,6 +510,7 @@ typedef struct { #define DRM_RADEON_GEM_GET_TILING 0x29 #define DRM_RADEON_GEM_BUSY 0x2a #define DRM_RADEON_GEM_VA 0x2b +#define DRM_RADEON_GEM_WAIT_USER_FENCE 0x2c #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -552,6 +553,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) +#define DRM_IOCTL_RADEON_GEM_WAIT_USER_FENCE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_USER_FENCE, struct drm_radeon_gem_wait_user_fence) typedef struct drm_radeon_init { enum { @@ -967,4 +969,30 @@ struct drm_radeon_info { uint64_t value; }; +/** + * struct drm_radeon_gem_wait_user_fence - DRM_RADEON_GEM_WAIT_USER_FENCE ioctl param + * + * @handle: Handle for the object that the GPU is expected to write + * @offset: Offset (in bytes) within that object where the GPU is expected + * to write. Must be DWORD-aligned + * @value: The value expected if the GPU has not yet written to this location + * @timeout_usec: The maximum time to wait for the GPU, in microseconds + * + * The DRM_RADEON_GEM_WAIT_USER_FENCE ioctl is meant to allow userspace to + * avoid busy-waiting for a EVENT_WRITE_EOP packet to complete (e.g. for + * fence sync objects in OpenGL). It expects the EVENT_WRITE_EOP packet to + * have requested an interrupt on completion. + * + * The ioctl will return immediately if the value supplied is not the value + * found in the buffer at offset bytes in; otherwise, it will sleep for up + * to timeout_usec, waking up when an EVENT_WRITE_EOP packet causes an + * interrupt and the value in the buffer might have changed. + */ +struct drm_radeon_gem_wait_user_fence { + uint32_t handle; + uint64_t offset; + uint32_t value; + uint64_t timeout_usec; +}; + #endif -- 1.7.6.4 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel