Provide a mechanism for userspace to wait for kernel fences, and teach the CS ioctl to return enough information to let userspace use this mechanism. This mechanism isn't safe to include as-is, as there is no way for the CS ioctl to indicate that it hasn't given you the information needed to let you wait for a kernel fence. I'm thus sending it out as a starting point in case someone else wants to continue digging into this. Signed-off-by: Simon Farnsworth <simon.farnsworth@xxxxxxxxxxxx> --- Note the FIXME - how does the CS ioctl indicate a fence failure? I'm also a little concerned that my general userspace API isn't flexible enough to match what we actually want - in particular, it would be nice for GL_ARB_sync to be able to plonk fences down scattered throughout the IB, and wait for that fence, not the whole CS. I couldn't easily determine whether it was safe to add a kernel fence to a userspace IB during parsing, though, so didn't attempt to do that. Finally, this mechanism (as compared to the wait mechanism I proposed in [PATCH] drm/radeon: Add support for userspace fence waits), doesn't do anything to protect us against malicious userspace causing interrupt storms - userspace can still fill an IB with EVENT_WRITE_EOP packets requesting an interrupt, and this mechanism allows userspace to ensure that the interrupt is enabled. Someone needs to decide if that's a risk worth worrying about - if so, the CS checkers need to reject attempts by userspace to request interrupts on completion. drivers/gpu/drm/radeon/radeon.h | 3 +++ drivers/gpu/drm/radeon/radeon_cs.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_fence.c | 16 ++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/drm/radeon_drm.h | 15 +++++++++++++++ 5 files changed, 60 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b4cfb11..f26744a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -800,6 +800,7 @@ struct radeon_cs_parser { int chunk_ib_idx; int chunk_relocs_idx; int chunk_flags_idx; + int chunk_fence_idx; struct radeon_ib *ib; void *track; unsigned family; @@ -1348,6 +1349,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_fence_wait_value_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); /* VRAM scratch page for HDP bug, default vram page */ struct r600_vram_scratch { diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 435a3d9..e0e8165 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -163,6 +163,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) p->chunk_ib_idx = -1; p->chunk_relocs_idx = -1; p->chunk_flags_idx = -1; + p->chunk_fence_idx = -1; p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); if (p->chunks_array == NULL) { return -ENOMEM; @@ -207,6 +208,12 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) if (p->chunks[i].length_dw == 0) return -EINVAL; } + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FENCE) { + p->chunk_fence_idx = i; + /* A fence chunk must be able to store the end-of-stream fence */ + if (p->chunks[i].length_dw != (sizeof(struct drm_radeon_fence_wait) / 4)) + return -EINVAL; + } p->chunks[i].length_dw = user_chunk.length_dw; p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; @@ -279,6 +286,24 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return 0; } +static void radeon_cs_parser_output_fence(struct radeon_cs_parser *parser) +{ + struct drm_radeon_fence_wait output; + void __user* chunk; + + memset(&output, 0, sizeof(struct drm_radeon_fence_wait)); + + if (parser->chunk_fence_idx != -1) { + chunk = (void __user*)(unsigned long)parser->chunks_array[parser->chunk_fence_idx]; + output.ring_token = parser->ib->fence->ring; + output.fence_token = parser->ib->fence->seq; + /* FIXME: What do I do about an error here? */ + DRM_COPY_TO_USER(chunk, + &output, + sizeof(struct drm_radeon_fence_wait)); + } +} + /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 85893c3..50bb054 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -356,6 +356,22 @@ int radeon_fence_wait_value(struct radeon_device *rdev, int ring, return r; } +int radeon_fence_wait_value_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct radeon_device *rdev = dev->dev_private; + struct drm_radeon_fence_wait *args = data; + int r; + + r = radeon_fence_wait_value(rdev, + args->ring_token, + args->fence_token, + usecs_to_jiffies(args->timeout_usec)); + if (r > 0) + r = 0; + + return r; +} struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) { kref_get(&fence->kref); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index d335288..a52db06 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -496,5 +496,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_FENCE_WAIT, radeon_fence_wait_value_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index dd2e9cf..d81ce94 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -510,6 +510,7 @@ typedef struct { #define DRM_RADEON_GEM_GET_TILING 0x29 #define DRM_RADEON_GEM_BUSY 0x2a #define DRM_RADEON_GEM_VA 0x2b +#define DRM_RADEON_FENCE_WAIT 0x2c #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -552,6 +553,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) +#define DRM_IOCTL_RADEON_FENCE_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_FENCE_WAIT, struct drm_radeon_fence_wait) typedef struct drm_radeon_init { enum { @@ -906,6 +908,12 @@ struct drm_radeon_gem_va { #define RADEON_CHUNK_ID_RELOCS 0x01 #define RADEON_CHUNK_ID_IB 0x02 #define RADEON_CHUNK_ID_FLAGS 0x03 +#define RADEON_CHUNK_ID_FENCE 0x04 + +/* A RADEON_CHUNK_ID_FENCE is a struct drm_radeon_fence_wait. If the CS is + * accepted, the kernel will fill in the fence_token and ring_token elements + * such that userspace just needs to fill in timeout_usec and call the + * DRM_RADEON_FENCE_WAIT ioctl to wait for this CS to complete. */ /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ #define RADEON_CS_KEEP_TILING_FLAGS 0x01 @@ -967,4 +975,11 @@ struct drm_radeon_info { uint64_t value; }; +struct drm_radeon_fence_wait { + uint64_t fence_token; + uint64_t timeout_usec; + uint32_t ring_token; + uint32_t padding; +}; + #endif -- 1.7.6.4 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel