Userspace currently busywaits for fences to complete; on my workload, this busywait consumes 10% of the available CPU time. Provide an ioctl so that userspace can wait for an EOP interrupt that corresponds to a previous EVENT_WRITE_EOP. Signed-off-by: Simon Farnsworth <simon.farnsworth@xxxxxxxxxxxx> --- There's debate ongoing about whether this is a sensible interface; I've cleaned up in accordance with Dave Airlie's review comments, so that there's a nicer starting point if the debate ends up agreeing that this is actually the sort of interface we want. In the meantime, I'm going to leave this interface alone, and work on porting r300g's fence mechanism to r600g, using it to sleep instead of spinning when fences are issued with an infinite timeout, but using the existing busywait mechanism if a timeout is provided. drivers/gpu/drm/radeon/radeon.h | 3 ++ drivers/gpu/drm/radeon/radeon_drv.c | 3 +- drivers/gpu/drm/radeon/radeon_fence.c | 2 + drivers/gpu/drm/radeon/radeon_gem.c | 63 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/drm/radeon_drm.h | 31 ++++++++++++++++ 6 files changed, 102 insertions(+), 1 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2859406..00c187b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -217,6 +217,7 @@ struct radeon_fence_driver { unsigned long last_jiffies; unsigned long last_timeout; wait_queue_head_t queue; + wait_queue_head_t userspace_queue; struct list_head created; struct list_head emitted; struct list_head signaled; @@ -1348,6 +1349,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); /* VRAM scratch page for HDP bug, default vram page */ struct r600_vram_scratch { diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 4ae2e1d..9f82fa9 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -56,9 +56,10 @@ * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS * 2.13.0 - virtual memory support * 2.14.0 - add evergreen tiling informations + * 2.15.0 - gem_wait_user_fence ioctl */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 14 +#define KMS_DRIVER_MINOR 15 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 64ea3dd..d86bc28 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -356,6 +356,7 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) if (wake) { wake_up_all(&rdev->fence_drv[ring].queue); } + wake_up_interruptible_all(&rdev->fence_drv[ring].userspace_queue); } int radeon_fence_count_emitted(struct radeon_device *rdev, int ring) @@ -421,6 +422,7 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); init_waitqueue_head(&rdev->fence_drv[ring].queue); + init_waitqueue_head(&rdev->fence_drv[ring].userspace_queue); rdev->fence_drv[ring].initialized = false; } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 7337850..765fc6d 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -531,3 +531,66 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv, { return drm_gem_handle_delete(file_priv, handle); } + +int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_wait_user_fence *args = data; + struct radeon_device *rdev = dev->dev_private; + struct drm_gem_object *gobj; + struct radeon_bo *robj; + void *buffer_data; + uint32_t *fence_data; + int r = 0; + long timeout; + int ring = RADEON_RING_TYPE_GFX_INDEX; + + /* If you're implementing this for other rings, you'll want to share + code with radeon_cs_get_ring in radeon_cs.c */ + if (args->ring != RADEON_CS_RING_GFX) { + return -EINVAL; + } + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + return -ENOENT; + } + robj = gem_to_radeon_bo(gobj); + + if (gobj->size < args->offset) { + r = -EINVAL; + goto unreference; + } + + r = radeon_bo_reserve(robj, true); + if (r) { + goto unreference; + } + + r = radeon_bo_kmap(robj, &buffer_data); + if (r) { + goto unreserve; + } + + radeon_irq_kms_sw_irq_get(rdev, ring); + + fence_data = (uint32_t*)buffer_data; + timeout = wait_event_interruptible_timeout(rdev->fence_drv[ring].userspace_queue, + fence_data[args->offset >> 2] != args->value, + usecs_to_jiffies(args->timeout_usec)); + + radeon_irq_kms_sw_irq_put(rdev, ring); + + if (timeout == 0) + r = -ETIMEDOUT; + else if (timeout < 0) + r = timeout; + + radeon_bo_kunmap(robj); +unreserve: + radeon_bo_unreserve(robj); +unreference: + drm_gem_object_unreference_unlocked(gobj); + + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index d335288..0e552cc 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -496,5 +496,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_USER_FENCE, radeon_gem_wait_user_fence_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index dd2e9cf..c8b083d 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -510,6 +510,7 @@ typedef struct { #define DRM_RADEON_GEM_GET_TILING 0x29 #define DRM_RADEON_GEM_BUSY 0x2a #define DRM_RADEON_GEM_VA 0x2b +#define DRM_RADEON_GEM_WAIT_USER_FENCE 0x2c #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -552,6 +553,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) +#define DRM_IOCTL_RADEON_GEM_WAIT_USER_FENCE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_USER_FENCE, struct drm_radeon_gem_wait_user_fence) typedef struct drm_radeon_init { enum { @@ -967,4 +969,33 @@ struct drm_radeon_info { uint64_t value; }; +/** + * struct drm_radeon_gem_wait_user_fence - DRM_RADEON_GEM_WAIT_USER_FENCE ioctl param + * + * @handle: Handle for the object that the GPU is expected to write + * @ring: The ring on which the fence packet was issued + * @offset: Offset (in bytes) within that object where the GPU is expected + * to write. Must be DWORD-aligned + * @value: The value expected if the GPU has not yet written to this location + * @timeout_usec: The maximum time to wait for the GPU, in microseconds + * + * The DRM_RADEON_GEM_WAIT_USER_FENCE ioctl is meant to allow userspace to + * avoid busy-waiting for a EVENT_WRITE_EOP packet to complete (e.g. for + * fence sync objects in OpenGL). It expects the EVENT_WRITE_EOP packet to + * have requested an interrupt on completion. + * + * The ioctl will return immediately if the value supplied is not the value + * found in the buffer at offset bytes in; otherwise, it will sleep for up + * to timeout_usec, waking up when an EVENT_WRITE_EOP packet causes an + * interrupt and the value in the buffer might have changed. + */ +struct drm_radeon_gem_wait_user_fence { + uint64_t timeout_usec; + uint64_t offset; + uint32_t value; + uint32_t handle; + uint32_t ring; + uint32_t padding; +}; + #endif -- 1.7.6.4 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel