Make amdgpu the owner of all per-pipe state of the HQDs. This change will allow us to split the queues between kfd and amdgpu with a queue granularity instead of pipe granularity. This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which goes unused. v2: support for gfx9 Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> Acked-by: Christian König <christian.koenig at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 13 +------ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 28 ++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 33 +++++++++++----- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 24 ++++++++---- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 45 ---------------------- 7 files changed, 65 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f8a08d8..8187587 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -890,23 +890,23 @@ struct amdgpu_rlc { u32 *register_list_format; u32 *register_restore; }; struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; struct amdgpu_bo *mec_fw_obj; u64 mec_fw_gpu_addr; - u32 num_pipe; u32 num_mec; - u32 num_queue; + u32 num_pipe_per_mec; + u32 num_queue_per_pipe; void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; }; struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 038b7ea..910f9d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -237,32 +237,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, /* Mapping vmid to pasid also for IH block */ WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); return 0; } static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_VMID, 0); - WREG32(mmCP_HPD_EOP_CONTROL, hpd_size); - unlock_srbm(kgd); - + /* amdgpu owns the per-pipe state */ return 0; } static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 2ecef3d..5843368 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -199,20 +199,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, /* Mapping vmid to pasid also for IH block */ WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); return 0; } static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { + /* amdgpu owns the per-pipe state */ return 0; } static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 629e3fb..3340012 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2821,34 +2821,48 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); adev->gfx.mec.hpd_eop_obj = NULL; } } static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; /* * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total * Nonetheless, we assign only 1 pipe because all other pipes will * be handled by KFD */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + switch (adev->asic_type) { + case CHIP_KAVERI: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + default: + adev->gfx.mec.num_mec = 1; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec + * GFX7_MEC_HPD_SIZE * 2; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); @@ -2864,21 +2878,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) return r; } r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); if (r) { dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); gfx_v7_0_mec_fini(adev); return r; } /* clear memory. Not sure if this is required or not */ - memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); return 0; } struct hqd_registers { u32 cp_mqd_base_addr; @@ -3202,23 +3216,23 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) { int r, i, j; u32 tmp; struct amdgpu_ring *ring; /* fix up chicken bits */ tmp = RREG32(mmCP_CPF_DEBUG); tmp |= (1 << 23); WREG32(mmCP_CPF_DEBUG, tmp); - /* init the pipes */ + /* init all pipes (even the ones we don't own) */ for (i = 0; i < adev->gfx.mec.num_mec; i++) - for (j = 0; j < adev->gfx.mec.num_pipe; j++) + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) gfx_v7_0_compute_pipe_init(adev, i, j); /* init the queues */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { r = gfx_v7_0_compute_queue_init(adev, i); if (r) { gfx_v7_0_cp_compute_fini(adev); return r; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ad08291..b34e597 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1414,32 +1414,47 @@ static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } #define GFX8_MEC_HPD_SIZE 2048 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_POLARIS10: + case CHIP_CARRIZO: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_TOPAZ: + case CHIP_STONEY: + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + + /* only 1 pipe of the first MEC is owned by amdgpu */ + mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); @@ -1454,21 +1469,21 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) gfx_v8_0_mec_fini(adev); return r; } r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); if (r) { dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); gfx_v8_0_mec_fini(adev); return r; } - memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); return 0; } static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a447b70..5dcc1b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -476,34 +476,42 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) #define MEC_HPD_SIZE 2048 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; const __le32 *fw_data; unsigned fw_size; u32 *fw; + size_t mec_hpd_size; const struct gfx_firmware_header_v1_0 *mec_hdr; - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->gfx.mec.num_mec = 2; + break; + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + + /* only 1 pipe of the first MEC is owned by amdgpu */ + mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f49c551..c064dea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -465,69 +465,24 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, ATC_VMID_PASID_MAPPING_VALID; return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( dqm->dev->kgd, pasid_mapping, vmid); } int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_num, unsigned int first_pipe) { - void *hpdptr; - struct mqd_manager *mqd; - unsigned int i, err, inx; - uint64_t pipe_hpd_addr; - BUG_ON(!dqm || !dqm->dev); pr_debug("kfd: In func %s\n", __func__); - /* - * Allocate memory for the HPDs. This is hardware-owned per-pipe data. - * The driver never accesses this memory after zeroing it. - * It doesn't even have to be saved/restored on suspend/resume - * because it contains no data when there are no active queues. - */ - - err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num, - &dqm->pipeline_mem); - - if (err) { - pr_err("kfd: error allocate vidmem num pipes: %d\n", - pipes_num); - return -ENOMEM; - } - - hpdptr = dqm->pipeline_mem->cpu_ptr; - dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr; - - memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num); - - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (mqd == NULL) { - kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); - return -ENOMEM; - } - - for (i = 0; i < pipes_num; i++) { - inx = i + first_pipe; - /* - * HPD buffer on GTT is allocated by amdkfd, no need to waste - * space in GTT for pipelines we don't initialize - */ - pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES; - pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr); - /* = log2(bytes/4)-1 */ - dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx, - CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr); - } - return 0; } static void init_interrupts(struct device_queue_manager *dqm) { unsigned int i; BUG_ON(dqm == NULL); for (i = 0 ; i < get_pipes_num(dqm) ; i++) -- 2.9.3