Userspace currently busywaits for fences to complete; on my workload, this busywait consumes 10% of the available CPU time. Provide an ioctl so that userspace can wait for an EOP interrupt that corresponds to a previous EVENT_WRITE_EOP. Signed-off-by: Simon Farnsworth <simon.farnsworth@xxxxxxxxxxxx> --- I've been working on top of Jerome's tiling patches, so this doesn't apply directly on top of current upstream kernels. I can easily rebase to another version upon request - just point me to a git tree. My goal is to remove the sched_yield in Mesa's r600_fence_finish given up to date enough kernel; I hope, though, that the interface is clean enough for other users to extend it in the future (e.g. using compute rings). drivers/gpu/drm/radeon/radeon.h | 3 + drivers/gpu/drm/radeon/radeon_drv.c | 3 +- drivers/gpu/drm/radeon/radeon_fence.c | 2 + drivers/gpu/drm/radeon/radeon_gem.c | 70 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/drm/radeon_drm.h | 30 ++++++++++++++ 6 files changed, 108 insertions(+), 1 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2859406..00c187b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -217,6 +217,7 @@ struct radeon_fence_driver { unsigned long last_jiffies; unsigned long last_timeout; wait_queue_head_t queue; + wait_queue_head_t userspace_queue; struct list_head created; struct list_head emitted; struct list_head signaled; @@ -1348,6 +1349,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); /* VRAM scratch page for HDP bug, default vram page */ struct r600_vram_scratch { diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 4ae2e1d..9f82fa9 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -56,9 +56,10 @@ * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS * 2.13.0 - virtual memory support * 2.14.0 - add evergreen tiling informations + * 2.15.0 - gem_wait_user_fence ioctl */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 14 +#define KMS_DRIVER_MINOR 15 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 64ea3dd..d86bc28 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -356,6 +356,7 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) if (wake) { wake_up_all(&rdev->fence_drv[ring].queue); } + wake_up_interruptible_all(&rdev->fence_drv[ring].userspace_queue); } int radeon_fence_count_emitted(struct radeon_device *rdev, int ring) @@ -421,6 +422,7 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); init_waitqueue_head(&rdev->fence_drv[ring].queue); + init_waitqueue_head(&rdev->fence_drv[ring].userspace_queue); rdev->fence_drv[ring].initialized = false; } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 7337850..602274f 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -531,3 +531,73 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv, { return drm_gem_handle_delete(file_priv, handle); } + +int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_wait_user_fence *args = data; + struct radeon_device *rdev = dev->dev_private; + struct drm_gem_object *gobj; + struct radeon_bo *robj; + void *buffer_data; + uint32_t *fence_data; + int r = 0; + long timeout; + int ring = RADEON_RING_TYPE_GFX_INDEX; + + /* If you're implementing this for other rings, you'll want to share + code with radeon_cs_get_ring in radeon_cs.c */ + if (args->ring != RADEON_CS_RING_GFX) { + return -EINVAL; + } + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + return -ENOENT; + } + robj = gem_to_radeon_bo(gobj); + + if (gobj->size < args->offset) { + r = -EINVAL; + goto unreference; + } + + r = radeon_bo_reserve(robj, true); + if (r) { + goto unreference; + } + + r = radeon_bo_pin(robj, RADEON_GEM_DOMAIN_GTT, NULL); + if (r) { + goto unreserve; + } + + r = radeon_bo_kmap(robj, &buffer_data); + if (r) { + goto unpin; + } + + radeon_irq_kms_sw_irq_get(rdev, ring); + + fence_data = (uint32_t*)buffer_data; + timeout = wait_event_interruptible_timeout(rdev->fence_drv[ring].userspace_queue, + fence_data[args->offset >> 2] != args->value, + usecs_to_jiffies(args->timeout_usec)); + + radeon_irq_kms_sw_irq_put(rdev, ring); + + if (timeout == 0) + r = -ETIMEDOUT; + else if (timeout < 0) + r = timeout; + + radeon_bo_kunmap(robj); +unpin: + radeon_bo_unpin(robj); +unreserve: + radeon_bo_unreserve(robj); +unreference: + drm_gem_object_unreference_unlocked(gobj); + + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index d335288..0e552cc 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -496,5 +496,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_USER_FENCE, radeon_gem_wait_user_fence_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index dd2e9cf..3d4ae93 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -510,6 +510,7 @@ typedef struct { #define DRM_RADEON_GEM_GET_TILING 0x29 #define DRM_RADEON_GEM_BUSY 0x2a #define DRM_RADEON_GEM_VA 0x2b +#define DRM_RADEON_GEM_WAIT_USER_FENCE 0x2c #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -552,6 +553,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) +#define DRM_IOCTL_RADEON_GEM_WAIT_USER_FENCE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_USER_FENCE, struct drm_radeon_gem_wait_user_fence) typedef struct drm_radeon_init { enum { @@ -967,4 +969,32 @@ struct drm_radeon_info { uint64_t value; }; +/** + * struct drm_radeon_gem_wait_user_fence - DRM_RADEON_GEM_WAIT_USER_FENCE ioctl param + * + * @handle: Handle for the object that the GPU is expected to write + * @ring: The ring on which the fence packet was issued + * @offset: Offset (in bytes) within that object where the GPU is expected + * to write. Must be DWORD-aligned + * @value: The value expected if the GPU has not yet written to this location + * @timeout_usec: The maximum time to wait for the GPU, in microseconds + * + * The DRM_RADEON_GEM_WAIT_USER_FENCE ioctl is meant to allow userspace to + * avoid busy-waiting for a EVENT_WRITE_EOP packet to complete (e.g. for + * fence sync objects in OpenGL). It expects the EVENT_WRITE_EOP packet to + * have requested an interrupt on completion. + * + * The ioctl will return immediately if the value supplied is not the value + * found in the buffer at offset bytes in; otherwise, it will sleep for up + * to timeout_usec, waking up when an EVENT_WRITE_EOP packet causes an + * interrupt and the value in the buffer might have changed. + */ +struct drm_radeon_gem_wait_user_fence { + uint32_t handle; + uint32_t ring; + uint64_t offset; + uint32_t value; + uint64_t timeout_usec; +}; + #endif -- 1.7.6.4 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel