Instead of picking an arbitrary queue for KIQ, search for one according to policy. The queue must be unused. Also report the KIQ as an unavailable resource to KFD. In testing I ran into KCQ initialization issues when using pipes 2/3 of MEC2 for the KIQ. Therefore the policy disallows grabbing one of these. v2: fix (ring.me + 1) to (ring.me -1) in amdgpu_amdkfd_device_init Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 23 +++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 43 ++++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 42 ++++++++++++++++++++++++----- 4 files changed, 98 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f2929041c85..a02b08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1761,51 +1761,68 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr ring->count_dw -= count_dw; } } static inline struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) if (&adev->sdma.instance[i].ring == ring) break; if (i < AMDGPU_MAX_SDMA_INSTANCES) return &adev->sdma.instance[i]; else return NULL; } -static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev, - int mec, int pipe, int queue) +static inline int amdgpu_queue_to_bit(struct amdgpu_device *adev, + int mec, int pipe, int queue) { int bit = 0; bit += mec * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; bit += pipe * adev->gfx.mec.num_queue_per_pipe; bit += queue; - return test_bit(bit, adev->gfx.mec.queue_bitmap); + return bit; +} + +static inline void amdgpu_bit_to_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.mec.num_queue_per_pipe; + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + +} +static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + return test_bit(amdgpu_queue_to_bit(adev, mec, pipe, queue), + adev->gfx.mec.queue_bitmap); } /* * ASICs macro. */ #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) #define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev)) #define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l)) #define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev)) #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev)) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 8fc5aa3..339e8cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -94,40 +94,48 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, .num_mec = adev->gfx.mec.num_mec, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; /* this is going to have a few of the MSBs set that we need to * clear */ bitmap_complement(gpu_resources.queue_bitmap, adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); + /* remove the KIQ bit as well */ + if (adev->gfx.kiq.ring.ready) + clear_bit(amdgpu_queue_to_bit(adev, + adev->gfx.kiq.ring.me - 1, + adev->gfx.kiq.ring.pipe, + adev->gfx.kiq.ring.queue), + gpu_resources.queue_bitmap); + /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ last_valid_bit = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) clear_bit(i, gpu_resources.queue_bitmap); amdgpu_doorbell_get_kfd_info(adev, &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); kgd2kfd->device_init(adev->kfd, &gpu_resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { if (adev->kfd) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index d9c6867..487a848 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1356,64 +1356,91 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) } return 0; } static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { int r; if (adev->gfx.mec.hpd_eop_obj) { r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); adev->gfx.mec.hpd_eop_obj = NULL; } } +static int gfx_v8_0_kiq_acquire(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int queue_bit; + int mec, pipe, queue; + + queue_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + + while (queue_bit-- >= 0) { + if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) + continue; + + amdgpu_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + + /* Using pipes 2/3 from MEC 2 seems cause problems */ + if (mec == 1 && pipe > 1) + continue; + + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + return 0; + } + + dev_err(adev->dev, "Failed to find a queue for KIQ\n"); + return -EINVAL; +} + static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; int r = 0; r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); if (r) return r; ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; - } - ring->queue = 0; + r = gfx_v8_0_kiq_acquire(adev, ring); + if (r) + return r; + ring->eop_gpu_addr = kiq->eop_gpu_addr; sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); return r; } static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe, mec; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6ff002d..b1d3ce7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -631,62 +631,90 @@ static int gfx_v9_0_kiq_init(struct amdgpu_device *adev) r = amdgpu_bo_create_kernel(adev, GFX9_MEC_HPD_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, &kiq->eop_gpu_addr, (void **)&hpd); if (r) { dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); return r; } memset(hpd, 0, GFX9_MEC_HPD_SIZE); r = amdgpu_bo_reserve(kiq->eop_obj, false); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); amdgpu_bo_kunmap(kiq->eop_obj); amdgpu_bo_unreserve(kiq->eop_obj); return 0; } +static int gfx_v9_0_kiq_acquire(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int queue_bit; + int mec, pipe, queue; + + queue_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + + while (queue_bit-- >= 0) { + if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) + continue; + + amdgpu_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + + /* Using pipes 2/3 from MEC 2 seems cause problems */ + if (mec == 1 && pipe > 1) + continue; + + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + return 0; + } + + dev_err(adev->dev, "Failed to find a queue for KIQ\n"); + return -EINVAL; +} + static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; int r = 0; r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); if (r) return r; ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; - } + + r = gfx_v9_0_kiq_acquire(adev, ring); + if (r) + return r; ring->queue = 0; ring->eop_gpu_addr = kiq->eop_gpu_addr; sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); return r; } static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } /* create MQD for each compute queue */ static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev) -- 2.9.3