[PATCH] r600g: Use a fake reloc to sleep for fences

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



r300g is able to sleep until a fence completes rather than busywait because
it creates a special buffer object and relocation that stays busy until the
CS containing the fence is finished.

Copy the idea into r600g, and use it to sleep if the user asked for an
infinite wait, falling back to busywaiting if the user provided a timeout.

Signed-off-by: Simon Farnsworth <simon.farnsworth@xxxxxxxxxxxx>
---
This is a userspace only fix for my problem, as suggested by Mark Olšák. It
doesn't fix the case with a timeout (which doesn't matter to me), but works
on existing kernels.

Given that my previous suggested fix opened a can of worms (mostly because I
don't fully understand modern Radeon hardware), this is probably enough of a
fix to apply for now, leaving a proper fix for someone with more
understanding of the way multiple rings interact.

 src/gallium/drivers/r600/r600.h            |    2 +-
 src/gallium/drivers/r600/r600_hw_context.c |   16 +++++++++++++++-
 src/gallium/drivers/r600/r600_pipe.c       |   10 +++++++++-
 src/gallium/drivers/r600/r600_pipe.h       |    1 +
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index baf09c1..bac4890 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -284,7 +284,7 @@ void r600_context_queries_resume(struct r600_context *ctx);
 void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
 			    int flag_wait);
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence,
-                             unsigned offset, unsigned value);
+                             unsigned offset, unsigned value, struct pb_buffer **sleep_bo);
 void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags);
 void r600_context_flush_dest_caches(struct r600_context *ctx);
 
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 8eb8e6d..cc81c48 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1603,7 +1603,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	}
 }
 
-void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
+void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value, struct pb_buffer **sleep_bo)
 {
 	uint64_t va;
 
@@ -1623,6 +1623,20 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
+
+	if (sleep_bo) {
+		unsigned reloc_index;
+		/* Create a dummy BO so that fence_finish without a timeout can sleep waiting for completion */
+		*sleep_bo = ctx->ws->buffer_create(ctx->ws, 1, 1,
+						   PIPE_BIND_CUSTOM,
+						   RADEON_DOMAIN_GTT);
+		/* Add the fence as a dummy relocation. */
+		reloc_index = ctx->ws->cs_add_reloc(ctx->cs,
+						    ctx->ws->buffer_get_cs_handle(*sleep_bo),
+						    RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
+		if (reloc_index >= ctx->creloc)
+			ctx->creloc = reloc_index+1;
+	}
 }
 
 static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index c38fbc5..71e31b1 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -115,7 +115,7 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx)
 	pipe_reference_init(&fence->reference, 1);
 
 	rscreen->fences.data[fence->index] = 0;
-	r600_context_emit_fence(&ctx->ctx, rscreen->fences.bo, fence->index, 1);
+	r600_context_emit_fence(&ctx->ctx, rscreen->fences.bo, fence->index, 1, &fence->sleep_bo);
 out:
 	pipe_mutex_unlock(rscreen->fences.mutex);
 	return fence;
@@ -605,6 +605,14 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
 	}
 
 	while (rscreen->fences.data[rfence->index] == 0) {
+		/* Special-case infinite timeout */
+		if (timeout == PIPE_TIMEOUT_INFINITE &&
+		    rfence->sleep_bo) {
+			rscreen->ws->buffer_wait(rfence->sleep_bo, RADEON_USAGE_READWRITE);
+			pb_reference(&rfence->sleep_bo, NULL);
+			continue;
+		}
+
 		if (++spins % 256)
 			continue;
 #ifdef PIPE_OS_UNIX
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 9ba60c8..5ca2528 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -170,6 +170,7 @@ struct r600_textures_info {
 struct r600_fence {
 	struct pipe_reference		reference;
 	unsigned			index; /* in the shared bo */
+	struct pb_buffer                *sleep_bo;
 	struct list_head		head;
 };
 
-- 
1.7.6.4

_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux