As each poll fetches 16 bytes from memory, a GPU running
at 1GHz with the current setting of 200 wait cycle between fetches causes
80 MB/s of memory traffic just to check for new work when the GPU is
otherwise idle, which is more FE traffic than in some GPU loaded cases.
Significantly increase the number of wait cycles to slow down the poll
interval to ~30µs, limiting the FE idle memory traffic to 512 KB/s, while
providing a max latency which should not hurt most use-cases. The FE WAIT
command seems to have some unknown discrete steps in the wait cycles
Signed-off-by: Lucas Stach <l.stach@xxxxxxxxxxxxxx>
Reviewed-by: Christian Gmeiner <cgmeiner@xxxxxxxxxx>
---
drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 11 ++++++-----
drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 7 +++++++
drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 1 +
3 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index cf741c5c82d2..384df1659be6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -53,11 +53,12 @@ static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
OUT(buffer, VIV_FE_END_HEADER_OP_END);
}
-static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
+static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer,
+ unsigned int waitcycles)
{
buffer->user_size = ALIGN(buffer->user_size, 8);
- OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
+ OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | waitcycles);
}
static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
@@ -168,7 +169,7 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
/* initialize buffer */
buffer->user_size = 0;
- CMD_WAIT(buffer);
+ CMD_WAIT(buffer, gpu->fe_waitcycles);
CMD_LINK(buffer, 2,
etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+ buffer->user_size - 4);
@@ -320,7 +321,7 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
CMD_END(buffer);
/* Append waitlink */
- CMD_WAIT(buffer);
+ CMD_WAIT(buffer, gpu->fe_waitcycles);
CMD_LINK(buffer, 2,
etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+ buffer->user_size - 4);
@@ -503,7 +504,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
VIVS_GL_EVENT_FROM_PE);
- CMD_WAIT(buffer);
+ CMD_WAIT(buffer, gpu->fe_waitcycles);
CMD_LINK(buffer, 2,
etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+ buffer->user_size - 4);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 41aab1aa330b..8c20dff32240 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -493,6 +493,13 @@ static void etnaviv_gpu_update_clock(struct etnaviv_gpu *gpu)
clock |= VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale);
etnaviv_gpu_load_clock(gpu, clock);
}
+
+ /*
+ * Choose number of wait cycles to target a ~30us (1/32768) max latency
+ * until new work is picked up by the FE when it polls in the idle loop.
+ */
+ gpu->fe_waitcycles = min(gpu->base_rate_core >> (15 - gpu->freq_scale),
+ 0xffffUL);