The assumption that we are only using the first pipe no longer holds. Instead, calculate the queue_mask from the queue_bitmap. Acked-by: Felix Kuehling <Felix.Kuehling at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 20 ++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 23 +++++++++++++++++++++-- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 90e1dd3..ff77351 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4697,60 +4697,76 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) /* KIQ functions */ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) { uint32_t tmp; struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ tmp = RREG32(mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); WREG32(mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; WREG32(mmRLC_CP_SCHEDULERS, tmp); } static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; uint32_t scratch, tmp = 0; + uint64_t queue_mask = 0; int r, i; + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + r = amdgpu_gfx_scratch_get(adev, &scratch); if (r) { DRM_ERROR("Failed to get scratch reg (%d).\n", r); return r; } WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); amdgpu_gfx_scratch_free(adev, scratch); return r; } /* set resources */ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); /* map queues */ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_NUM_QUEUES(1)); amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | PACKET3_MAP_QUEUES_QUEUE(ring->queue) | PACKET3_MAP_QUEUES_PIPE(ring->pipe) | PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6208493..5a5ff47 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1895,46 +1895,65 @@ static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) return 0; } /* KIQ functions */ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) { uint32_t tmp; struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) { + int i; + struct amdgpu_device *adev = ring->adev; + uint64_t queue_mask = 0; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + amdgpu_ring_alloc(ring, 8); /* set resources */ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(ring, 0); /* queue mask hi */ + amdgpu_ring_write(ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(ring, upper_32_bits(queue_mask)); /* queue mask hi */ amdgpu_ring_write(ring, 0); /* gws mask lo */ amdgpu_ring_write(ring, 0); /* gws mask hi */ amdgpu_ring_write(ring, 0); /* oac mask */ amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ amdgpu_ring_commit(ring); udelay(50); } static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring, struct amdgpu_ring *ring) { struct amdgpu_device *adev = kiq_ring->adev; uint64_t mqd_addr, wptr_addr; mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); amdgpu_ring_alloc(kiq_ring, 8); amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ -- 2.9.3