Instead of busywaiting for the GPU to finish a fence, use the new kernel interface to wait for fence completion. If the new kernel interface is unavailable, fall back to busywaiting. Signed-off-by: Simon Farnsworth <simon.farnsworth@xxxxxxxxxxxx> --- This is simply addressing Michel's review comments against the v1 patch. There is ongoing debate on the dri-devel list about the required kernel interface to make this patch useful; until that discussion is resolved, this patch should probably not be applied. src/gallium/drivers/r600/r600_hw_context.c | 2 +- src/gallium/drivers/r600/r600_pipe.c | 14 +++------ src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 34 +++++++++++++++++++++++++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 16 +++++++++++ 4 files changed, 56 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 8eb8e6d..35a57a7 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -1618,7 +1618,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ /* DATA_SEL | INT_EN | ADDRESS_HI */ - ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF); + ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (2 << 24) | ((va >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index c38fbc5..c03a176 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -595,7 +595,6 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen, struct r600_screen *rscreen = (struct r600_screen *)pscreen; struct r600_fence *rfence = (struct r600_fence*)fence; int64_t start_time = 0; - unsigned spins = 0; if (timeout != PIPE_TIMEOUT_INFINITE) { start_time = os_time_get(); @@ -605,16 +604,13 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen, } while (rscreen->fences.data[rfence->index] == 0) { - if (++spins % 256) - continue; -#ifdef PIPE_OS_UNIX - sched_yield(); -#else - os_time_sleep(10); -#endif + rscreen->ws->buffer_wait_fence(rscreen->fences.bo->buf, + rfence->index << 2, + 0, + timeout); if (timeout != PIPE_TIMEOUT_INFINITE && os_time_get() - start_time >= timeout) { - return FALSE; + return rscreen->fences.data[rfence->index] == 0 ? FALSE : TRUE; } } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 143dcf9..b83791d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -879,6 +879,36 @@ static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer) return bo->va; } +/* No kernel support for doing this faster - just spin */ +static void radeon_winsys_bo_wait_fence_nokernel(struct pb_buffer *buf, + unsigned offset, + uint32_t value, + uint64_t timeout) +{ +#ifdef PIPE_OS_UNIX + sched_yield(); +#else + os_time_sleep(10); +#endif +} + +static void radeon_winsys_bo_wait_fence(struct pb_buffer *_buf, + unsigned offset, + uint32_t value, + uint64_t timeout) +{ + struct radeon_bo *bo = get_radeon_bo(_buf); + struct drm_radeon_gem_wait_user_fence args; + memset(&args, 0, sizeof(args)); + args.handle = bo->handle; + args.ring = RADEON_CS_RING_GFX; + args.offset = offset; + args.value = value; + args.timeout_usec = timeout; + while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_USER_FENCE, + &args, sizeof(args)) == -EBUSY); +} + void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) { ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; @@ -892,4 +922,8 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; ws->base.buffer_get_virtual_address = radeon_winsys_bo_va; + if (ws->info.drm_major == 2 && ws->info.drm_minor >= 15) + ws->base.buffer_wait_fence = radeon_winsys_bo_wait_fence; + else + ws->base.buffer_wait_fence = radeon_winsys_bo_wait_fence_nokernel; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index e462e86..869961f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -264,6 +264,22 @@ struct radeon_winsys { */ uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); + /** + * Wait until a fence (EVENT_WRITE_EOP typically) has had a chance to + * write to a buffer. NB: there is no guarantee that the GPU has written + * to the buffer when this call returns, merely that it has had an + * opportunity to do so. + * + * \param buf A winsys buffer object + * \param offset Offset in bytes within the buffer that you expect to see changed - must be uint32_t aligned + * \param value The current value stored at offset + * \param timeout The maximum wait time, in microseconds + */ + void (*buffer_wait_fence)(struct pb_buffer *buf, + unsigned offset, + uint32_t value, + uint64_t timeout); + /************************************************************************** * Command submission. * -- 1.7.6.4 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel