Interesting!
Gamescope also uses a high priority dedicated compute queue.
I'll see if this improves things on Deck with FSR, and other (sad) times
we have to composite.
Have you looked at how things change with gpuvis at all by the way?
- Joshie 🐸✨
On 12/2/23 00:17, Friedrich Vock wrote:
This improves latency if the GPU is already busy with other work.
This is useful for VR compositors that submit highly latency-sensitive
compositing work on high-priority compute queues while the GPU is busy
rendering the next frame.
Userspace merge request:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462
Signed-off-by: Friedrich Vock <friedrich.vock@xxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 ++++++----
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++-
4 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9505dc8f9d69..4b923a156c4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -790,6 +790,7 @@ struct amdgpu_mqd_prop {
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
uint32_t hqd_queue_priority;
+ bool allow_tunneling;
bool hqd_active;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 231d49132a56..4d98e8879be8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -620,6 +620,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
struct amdgpu_mqd_prop *prop)
{
struct amdgpu_device *adev = ring->adev;
+ bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+ amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+ bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+ amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
memset(prop, 0, sizeof(*prop));
@@ -637,10 +641,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
*/
prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
- if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
- amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
- (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
- amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+ prop->allow_tunneling = is_high_prio_compute;
+ if (is_high_prio_compute || is_high_prio_gfx) {
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c8a3bf01743f..73f6d7e72c73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index c659ef0f47ce..bdcf96df69e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -3847,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp;
--
2.43.0