Instead of taking the first pipe and giving the rest to kfd, take the first 2 queues of each pipe. Effectively, amdgpu and amdkfd own the same number of queues. But because the queues are spread over multiple pipes the hardware will be able to better handle concurrent compute workloads. amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4 amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4 v2: fix policy comment Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> Acked-by: Christian König <christian.koenig at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 06338e8..d5209d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2821,42 +2821,42 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) adev->gfx.mec.hpd_eop_obj = NULL; } } static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe, mec; /* policy for amdgpu compute queue ownership */ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { queue = i % adev->gfx.mec.num_queue_per_pipe; pipe = (i / adev->gfx.mec.num_queue_per_pipe) % adev->gfx.mec.num_pipe_per_mec; mec = (i / adev->gfx.mec.num_queue_per_pipe) / adev->gfx.mec.num_pipe_per_mec; /* we've run out of HW */ if (mec >= adev->gfx.mec.num_mec) break; - /* policy: amdgpu owns all queues in the first pipe */ - if (mec == 0 && pipe == 0) + /* policy: amdgpu owns the first two queues of the first MEC */ + if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); } /* update the number of active compute rings */ adev->gfx.num_compute_rings = bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* If you hit this case and edited the policy, you probably just * need to increase AMDGPU_MAX_COMPUTE_RINGS */ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; } static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; size_t mec_hpd_size; bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2178611..a5ba48b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1439,42 +1439,42 @@ static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe, mec; /* policy for amdgpu compute queue ownership */ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { queue = i % adev->gfx.mec.num_queue_per_pipe; pipe = (i / adev->gfx.mec.num_queue_per_pipe) % adev->gfx.mec.num_pipe_per_mec; mec = (i / adev->gfx.mec.num_queue_per_pipe) / adev->gfx.mec.num_pipe_per_mec; /* we've run out of HW */ if (mec >= adev->gfx.mec.num_mec) break; - /* policy: amdgpu owns all queues in the first pipe */ - if (mec == 0 && pipe == 0) + /* policy: amdgpu owns the first two queues of the first MEC */ + if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); } /* update the number of active compute rings */ adev->gfx.num_compute_rings = bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* If you hit this case and edited the policy, you probably just * need to increase AMDGPU_MAX_COMPUTE_RINGS */ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; } static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; size_t mec_hpd_size; bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); -- 2.9.3