On Tue, Apr 4, 2017 at 11:16 PM, Alex Deucher <alexdeucher at gmail.com> wrote: > On Tue, Apr 4, 2017 at 6:05 PM, Andres Rodriguez <andresx7 at gmail.com> wrote: >> Previously the queue/pipe split with kfd operated with pipe >> granularity. This patch allows amdgpu to take ownership of an arbitrary >> set of queues. >> >> It also consolidates the last few magic numbers in the compute >> initialization process into mec_init. >> >> v2: support for gfx9 >> >> Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> >> Acked-by: Christian König <christian.koenig at amd.com> >> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++ >> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 83 +++++++++++++++++------- >> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 81 +++++++++++++++++++----- >> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 84 +++++++++++++++++++++++-- >> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 + >> 5 files changed, 212 insertions(+), 44 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index b92f6cb..e2d8243 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -39,20 +39,22 @@ >> #include <ttm/ttm_bo_api.h> >> #include <ttm/ttm_bo_driver.h> >> #include <ttm/ttm_placement.h> >> #include <ttm/ttm_module.h> >> #include <ttm/ttm_execbuf_util.h> >> >> #include <drm/drmP.h> >> #include <drm/drm_gem.h> >> #include <drm/amdgpu_drm.h> >> >> +#include <kgd_kfd_interface.h> >> + >> #include "amd_shared.h" >> #include "amdgpu_mode.h" >> #include "amdgpu_ih.h" >> #include "amdgpu_irq.h" >> #include "amdgpu_ucode.h" >> #include "amdgpu_ttm.h" >> #include "amdgpu_psp.h" >> #include "amdgpu_gds.h" >> #include "amdgpu_sync.h" >> #include "amdgpu_ring.h" >> @@ -899,29 +901,34 @@ struct amdgpu_rlc { >> u32 reg_list_format_start; >> u32 reg_list_format_separate_start; >> u32 starting_offsets_start; >> u32 reg_list_format_size_bytes; >> u32 reg_list_size_bytes; >> >> u32 *register_list_format; >> u32 *register_restore; >> }; >> >> +#define AMDGPU_MAX_QUEUES KGD_MAX_QUEUES > > Can we rename this to AMDGPU_MAX_COMPUTE_QUEUES or better yet, unify > it with AMDGPU_MAX_COMPUTE_RINGS? I don't like having two defines for > the same thing. > I'm okay with the rename here. Since the meaning is slightly different, unifying them would have some undesired consequences. AMDGPU_MAX_QUEUES is the highest number of queues an ASIC may contain (with some wiggle room). On the other hand AMDGPU_MAX_COMPUTE_RINGS is the maximum number of queues we expect amdgpu to acquire. If we unify them we would have some arrays increase in size unnecessarily, e.g. mqd_backup[8 +1] would become mqd_backup[128 + 1]. You seem to be pointing at the problem here correctly though, the names don't convey that difference. >> + >> struct amdgpu_mec { >> struct amdgpu_bo *hpd_eop_obj; >> u64 hpd_eop_gpu_addr; >> struct amdgpu_bo *mec_fw_obj; >> u64 mec_fw_gpu_addr; >> u32 num_mec; >> u32 num_pipe_per_mec; >> u32 num_queue_per_pipe; >> void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; >> + >> + /* These are the resources for which amdgpu takes ownership */ >> + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_QUEUES); >> }; >> >> struct amdgpu_kiq { >> u64 eop_gpu_addr; >> struct amdgpu_bo *eop_obj; >> struct amdgpu_ring ring; >> struct amdgpu_irq_src irq; >> }; >> >> /* >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> index 3340012..0586f1c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> @@ -42,21 +42,20 @@ >> #include "gca/gfx_7_2_enum.h" >> #include "gca/gfx_7_2_sh_mask.h" >> >> #include "gmc/gmc_7_0_d.h" >> #include "gmc/gmc_7_0_sh_mask.h" >> >> #include "oss/oss_2_0_d.h" >> #include "oss/oss_2_0_sh_mask.h" >> >> #define GFX7_NUM_GFX_RINGS 1 >> -#define GFX7_NUM_COMPUTE_RINGS 8 > > Can we keep the local compute queue NUM defines? This way we can > change the number of queues per gfx version without affecting > everything. > > Alex > These defines don't affect how many queues amdgpu will grab. Instead, this can be configured on a per gfx version at gfx_vX_Y_compute_queue_acquire(). If a maximum is desired it can be configured within the policy. Andres >> #define GFX7_MEC_HPD_SIZE 2048 >> >> >> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); >> static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); >> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); >> >> MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); >> MODULE_FIRMWARE("radeon/bonaire_me.bin"); >> MODULE_FIRMWARE("radeon/bonaire_ce.bin"); >> @@ -2817,47 +2816,79 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) >> if (unlikely(r != 0)) >> dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); >> amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); >> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> >> amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); >> adev->gfx.mec.hpd_eop_obj = NULL; >> } >> } >> >> +static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev) >> +{ >> + int i, queue, pipe, mec; >> + >> + /* policy for amdgpu compute queue ownership */ >> + for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { >> + queue = i % adev->gfx.mec.num_queue_per_pipe; >> + pipe = (i / adev->gfx.mec.num_queue_per_pipe) >> + % adev->gfx.mec.num_pipe_per_mec; >> + mec = (i / adev->gfx.mec.num_queue_per_pipe) >> + / adev->gfx.mec.num_pipe_per_mec; >> + >> + /* we've run out of HW */ >> + if (mec > adev->gfx.mec.num_mec) >> + break; >> + >> + /* policy: amdgpu owns all queues in the first pipe */ >> + if (mec == 0 && pipe == 0) >> + set_bit(i, adev->gfx.mec.queue_bitmap); >> + } >> + >> + /* update the number of active compute rings */ >> + adev->gfx.num_compute_rings = >> + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES); >> + >> + /* If you hit this case and edited the policy, you probably just >> + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ >> + WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS); >> + if (adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS) >> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> +} >> + >> static int gfx_v7_0_mec_init(struct amdgpu_device *adev) >> { >> int r; >> u32 *hpd; >> size_t mec_hpd_size; >> >> - /* >> - * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total >> - * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total >> - * Nonetheless, we assign only 1 pipe because all other pipes will >> - * be handled by KFD >> - */ >> + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES); >> + >> switch (adev->asic_type) { >> case CHIP_KAVERI: >> adev->gfx.mec.num_mec = 2; >> break; >> case CHIP_BONAIRE: >> case CHIP_HAWAII: >> case CHIP_KABINI: >> case CHIP_MULLINS: >> default: >> adev->gfx.mec.num_mec = 1; >> break; >> } >> adev->gfx.mec.num_pipe_per_mec = 4; >> adev->gfx.mec.num_queue_per_pipe = 8; >> >> + /* take ownership of the relevant compute queues */ >> + gfx_v7_0_compute_queue_acquire(adev); >> + >> + /* allocate space for ALL pipes (even the ones we don't own) */ >> mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec >> * GFX7_MEC_HPD_SIZE * 2; >> if (adev->gfx.mec.hpd_eop_obj == NULL) { >> r = amdgpu_bo_create(adev, >> mec_hpd_size, >> PAGE_SIZE, true, >> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> &adev->gfx.mec.hpd_eop_obj); >> if (r) { >> dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> @@ -4522,21 +4553,21 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { >> static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { >> .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, >> .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode >> }; >> >> static int gfx_v7_0_early_init(void *handle) >> { >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> >> adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; >> - adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; >> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> adev->gfx.funcs = &gfx_v7_0_gfx_funcs; >> adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; >> gfx_v7_0_set_ring_funcs(adev); >> gfx_v7_0_set_irq_funcs(adev); >> gfx_v7_0_set_gds_init(adev); >> >> return 0; >> } >> >> static int gfx_v7_0_late_init(void *handle) >> @@ -4718,21 +4749,21 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) >> gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); >> break; >> } >> adev->gfx.config.gb_addr_config = gb_addr_config; >> } >> >> static int gfx_v7_0_sw_init(void *handle) >> { >> struct amdgpu_ring *ring; >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> - int i, r; >> + int i, r, ring_id; >> >> /* EOP Event */ >> r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); >> if (r) >> return r; >> >> /* Privileged reg */ >> r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, >> &adev->gfx.priv_reg_irq); >> if (r) >> @@ -4769,42 +4800,52 @@ static int gfx_v7_0_sw_init(void *handle) >> ring = &adev->gfx.gfx_ring[i]; >> ring->ring_obj = NULL; >> sprintf(ring->name, "gfx"); >> r = amdgpu_ring_init(adev, ring, 1024, >> &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); >> if (r) >> return r; >> } >> >> /* set up the compute queues */ >> - for (i = 0; i < adev->gfx.num_compute_rings; i++) { >> + for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) { >> unsigned irq_type; >> >> - /* max 32 queues per MEC */ >> - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { >> - DRM_ERROR("Too many (%d) compute rings!\n", i); >> - break; >> - } >> - ring = &adev->gfx.compute_ring[i]; >> + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) >> + continue; >> + >> + ring = &adev->gfx.compute_ring[ring_id]; >> + >> + /* mec0 is me1 */ >> + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe) >> + / adev->gfx.mec.num_pipe_per_mec) >> + + 1; >> + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe) >> + % adev->gfx.mec.num_pipe_per_mec; >> + ring->queue = i % adev->gfx.mec.num_queue_per_pipe; >> + >> ring->ring_obj = NULL; >> ring->use_doorbell = true; >> - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; >> - ring->me = 1; /* first MEC */ >> - ring->pipe = i / 8; >> - ring->queue = i % 8; >> + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; >> sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); >> - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; >> + >> + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP >> + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) >> + + ring->pipe; >> + >> /* type-2 packets are deprecated on MEC, use type-3 instead */ >> r = amdgpu_ring_init(adev, ring, 1024, >> &adev->gfx.eop_irq, irq_type); >> if (r) >> return r; >> + >> + ring_id++; >> } >> >> /* reserve GDS, GWS and OA resource for gfx */ >> r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, >> PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, >> &adev->gds.gds_gfx_bo, NULL, NULL); >> if (r) >> return r; >> >> r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> index 86cdcb8..177992c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> @@ -45,21 +45,20 @@ >> #include "gca/gfx_8_0_enum.h" >> #include "gca/gfx_8_0_sh_mask.h" >> #include "gca/gfx_8_0_enum.h" >> >> #include "dce/dce_10_0_d.h" >> #include "dce/dce_10_0_sh_mask.h" >> >> #include "smu/smu_7_1_3_d.h" >> >> #define GFX8_NUM_GFX_RINGS 1 >> -#define GFX8_NUM_COMPUTE_RINGS 8 >> #define GFX8_MEC_HPD_SIZE 2048 >> >> >> #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 >> #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 >> #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 >> #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 >> >> #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) >> #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) >> @@ -1410,47 +1409,82 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, >> } >> static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, >> struct amdgpu_irq_src *irq) >> { >> amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); >> amdgpu_ring_fini(ring); >> } >> >> #define GFX8_MEC_HPD_SIZE 2048 >> >> +static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev) >> +{ >> + int i, queue, pipe, mec; >> + >> + /* policy for amdgpu compute queue ownership */ >> + for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { >> + queue = i % adev->gfx.mec.num_queue_per_pipe; >> + pipe = (i / adev->gfx.mec.num_queue_per_pipe) >> + % adev->gfx.mec.num_pipe_per_mec; >> + mec = (i / adev->gfx.mec.num_queue_per_pipe) >> + / adev->gfx.mec.num_pipe_per_mec; >> + >> + /* we've run out of HW */ >> + if (mec > adev->gfx.mec.num_mec) >> + break; >> + >> + /* policy: amdgpu owns all queues in the first pipe */ >> + if (mec == 0 && pipe == 0) >> + set_bit(i, adev->gfx.mec.queue_bitmap); >> + } >> + >> + /* update the number of active compute rings */ >> + adev->gfx.num_compute_rings = >> + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES); >> + >> + /* If you hit this case and edited the policy, you probably just >> + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ >> + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) >> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> +} >> + >> static int gfx_v8_0_mec_init(struct amdgpu_device *adev) >> { >> int r; >> u32 *hpd; >> size_t mec_hpd_size; >> >> + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES); >> + >> switch (adev->asic_type) { >> case CHIP_FIJI: >> case CHIP_TONGA: >> case CHIP_POLARIS11: >> case CHIP_POLARIS12: >> case CHIP_POLARIS10: >> case CHIP_CARRIZO: >> adev->gfx.mec.num_mec = 2; >> break; >> case CHIP_TOPAZ: >> case CHIP_STONEY: >> default: >> adev->gfx.mec.num_mec = 1; >> break; >> } >> >> adev->gfx.mec.num_pipe_per_mec = 4; >> adev->gfx.mec.num_queue_per_pipe = 8; >> >> - /* only 1 pipe of the first MEC is owned by amdgpu */ >> - mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE; >> + /* take ownership of the relevant compute queues */ >> + gfx_v8_0_compute_queue_acquire(adev); >> + >> + mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; >> >> if (adev->gfx.mec.hpd_eop_obj == NULL) { >> r = amdgpu_bo_create(adev, >> mec_hpd_size, >> PAGE_SIZE, true, >> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> &adev->gfx.mec.hpd_eop_obj); >> if (r) { >> dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> return r; >> @@ -2083,21 +2117,21 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) >> gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); >> break; >> } >> adev->gfx.config.gb_addr_config = gb_addr_config; >> >> return 0; >> } >> >> static int gfx_v8_0_sw_init(void *handle) >> { >> - int i, r; >> + int i, r, ring_id; >> struct amdgpu_ring *ring; >> struct amdgpu_kiq *kiq; >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> >> /* KIQ event */ >> r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); >> if (r) >> return r; >> >> /* EOP Event */ >> @@ -2150,43 +2184,56 @@ static int gfx_v8_0_sw_init(void *handle) >> ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; >> } >> >> r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, >> AMDGPU_CP_IRQ_GFX_EOP); >> if (r) >> return r; >> } >> >> /* set up the compute queues */ >> - for (i = 0; i < adev->gfx.num_compute_rings; i++) { >> + for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) { >> unsigned irq_type; >> >> - /* max 32 queues per MEC */ >> - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { >> - DRM_ERROR("Too many (%d) compute rings!\n", i); >> + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) >> + continue; >> + >> + if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS)) >> break; >> - } >> - ring = &adev->gfx.compute_ring[i]; >> + >> + ring = &adev->gfx.compute_ring[ring_id]; >> + >> + /* mec0 is me1 */ >> + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe) >> + / adev->gfx.mec.num_pipe_per_mec) >> + + 1; >> + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe) >> + % adev->gfx.mec.num_pipe_per_mec; >> + ring->queue = i % adev->gfx.mec.num_queue_per_pipe; >> + >> ring->ring_obj = NULL; >> ring->use_doorbell = true; >> - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; >> - ring->me = 1; /* first MEC */ >> - ring->pipe = i / 8; >> - ring->queue = i % 8; >> - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE); >> + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE); >> + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; >> sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); >> - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; >> + >> + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP >> + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) >> + + ring->pipe; >> + >> /* type-2 packets are deprecated on MEC, use type-3 instead */ >> r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, >> irq_type); >> if (r) >> return r; >> + >> + ring_id++; >> } >> >> r = gfx_v8_0_kiq_init(adev); >> if (r) { >> DRM_ERROR("Failed to init KIQ BOs!\n"); >> return r; >> } >> >> kiq = &adev->gfx.kiq; >> r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); >> @@ -5686,21 +5733,21 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { >> .select_se_sh = &gfx_v8_0_select_se_sh, >> .read_wave_data = &gfx_v8_0_read_wave_data, >> .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, >> }; >> >> static int gfx_v8_0_early_init(void *handle) >> { >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> >> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; >> - adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; >> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> adev->gfx.funcs = &gfx_v8_0_gfx_funcs; >> gfx_v8_0_set_ring_funcs(adev); >> gfx_v8_0_set_irq_funcs(adev); >> gfx_v8_0_set_gds_init(adev); >> gfx_v8_0_set_rlc_funcs(adev); >> >> return 0; >> } >> >> static int gfx_v8_0_late_init(void *handle) >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> index 1a7b743..de6e537 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> @@ -31,21 +31,20 @@ >> #include "vega10/GC/gc_9_0_offset.h" >> #include "vega10/GC/gc_9_0_sh_mask.h" >> #include "vega10/vega10_enum.h" >> #include "vega10/HDP/hdp_4_0_offset.h" >> >> #include "soc15_common.h" >> #include "clearstate_gfx9.h" >> #include "v9_structs.h" >> >> #define GFX9_NUM_GFX_RINGS 1 >> -#define GFX9_NUM_COMPUTE_RINGS 8 >> #define GFX9_NUM_SE 4 >> #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 >> >> MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); >> MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); >> MODULE_FIRMWARE("amdgpu/vega10_me.bin"); >> MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); >> MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); >> MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); >> >> @@ -469,45 +468,79 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) >> amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); >> amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); >> >> amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); >> adev->gfx.mec.mec_fw_obj = NULL; >> } >> } >> >> #define MEC_HPD_SIZE 2048 >> >> +static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev) >> +{ >> + int i, queue, pipe, mec; >> + >> + /* policy for amdgpu compute queue ownership */ >> + for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { >> + queue = i % adev->gfx.mec.num_queue_per_pipe; >> + pipe = (i / adev->gfx.mec.num_queue_per_pipe) >> + % adev->gfx.mec.num_pipe_per_mec; >> + mec = (i / adev->gfx.mec.num_queue_per_pipe) >> + / adev->gfx.mec.num_pipe_per_mec; >> + >> + /* we've run out of HW */ >> + if (mec > adev->gfx.mec.num_mec) >> + break; >> + >> + /* policy: amdgpu owns all queues in the first pipe */ >> + if (mec == 0 && pipe == 0) >> + set_bit(i, adev->gfx.mec.queue_bitmap); >> + } >> + >> + /* update the number of active compute rings */ >> + adev->gfx.num_compute_rings = >> + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES); >> + >> + /* If you hit this case and edited the policy, you probably just >> + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ >> + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) >> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> +} >> + >> static int gfx_v9_0_mec_init(struct amdgpu_device *adev) >> { >> int r; >> u32 *hpd; >> const __le32 *fw_data; >> unsigned fw_size; >> u32 *fw; >> size_t mec_hpd_size; >> >> const struct gfx_firmware_header_v1_0 *mec_hdr; >> >> + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES); >> + >> switch (adev->asic_type) { >> case CHIP_VEGA10: >> adev->gfx.mec.num_mec = 2; >> break; >> default: >> adev->gfx.mec.num_mec = 1; >> break; >> } >> >> adev->gfx.mec.num_pipe_per_mec = 4; >> adev->gfx.mec.num_queue_per_pipe = 8; >> >> - /* only 1 pipe of the first MEC is owned by amdgpu */ >> - mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * MEC_HPD_SIZE; >> + /* take ownership of the relevant compute queues */ >> + gfx_v9_0_compute_queue_acquire(adev); >> + mec_hpd_size = adev->gfx.num_compute_rings * MEC_HPD_SIZE; >> >> if (adev->gfx.mec.hpd_eop_obj == NULL) { >> r = amdgpu_bo_create(adev, >> mec_hpd_size, >> PAGE_SIZE, true, >> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> &adev->gfx.mec.hpd_eop_obj); >> if (r) { >> dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> return r; >> @@ -1024,21 +1057,21 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) >> gfx_v9_0_write_data_to_reg(ring, 0, false, >> amdgpu_gds_reg_offset[0].mem_size, 0); >> >> amdgpu_ring_commit(ring); >> >> return 0; >> } >> >> static int gfx_v9_0_sw_init(void *handle) >> { >> - int i, r; >> + int i, r, ring_id; >> struct amdgpu_ring *ring; >> struct amdgpu_kiq *kiq; >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> >> /* KIQ event */ >> r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); >> if (r) >> return r; >> >> /* EOP Event */ >> @@ -1081,21 +1114,60 @@ static int gfx_v9_0_sw_init(void *handle) >> sprintf(ring->name, "gfx"); >> ring->use_doorbell = true; >> ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; >> r = amdgpu_ring_init(adev, ring, 1024, >> &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); >> if (r) >> return r; >> } >> >> /* set up the compute queues */ >> - for (i = 0; i < adev->gfx.num_compute_rings; i++) { >> + for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) { >> + unsigned irq_type; >> + >> + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) >> + continue; >> + >> + if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS)) >> + break; >> + >> + ring = &adev->gfx.compute_ring[ring_id]; >> + >> + /* mec0 is me1 */ >> + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe) >> + / adev->gfx.mec.num_pipe_per_mec) >> + + 1; >> + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe) >> + % adev->gfx.mec.num_pipe_per_mec; >> + ring->queue = i % adev->gfx.mec.num_queue_per_pipe; >> + >> + ring->ring_obj = NULL; >> + ring->use_doorbell = true; >> + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * MEC_HPD_SIZE); >> + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; >> + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); >> + >> + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP >> + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) >> + + ring->pipe; >> + >> + /* type-2 packets are deprecated on MEC, use type-3 instead */ >> + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, >> + irq_type); >> + if (r) >> + return r; >> + >> + ring_id++; >> + } >> + >> + /* set up the compute queues */ >> + for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) { >> unsigned irq_type; >> >> /* max 32 queues per MEC */ >> if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { >> DRM_ERROR("Too many (%d) compute rings!\n", i); >> break; >> } >> ring = &adev->gfx.compute_ring[i]; >> ring->ring_obj = NULL; >> ring->use_doorbell = true; >> @@ -2655,21 +2727,21 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, >> gfx_v9_0_write_data_to_reg(ring, 0, false, >> amdgpu_gds_reg_offset[vmid].oa, >> (1 << (oa_size + oa_base)) - (1 << oa_base)); >> } >> >> static int gfx_v9_0_early_init(void *handle) >> { >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> >> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; >> - adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; >> + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> gfx_v9_0_set_ring_funcs(adev); >> gfx_v9_0_set_irq_funcs(adev); >> gfx_v9_0_set_gds_init(adev); >> gfx_v9_0_set_rlc_funcs(adev); >> >> return 0; >> } >> >> static int gfx_v9_0_late_init(void *handle) >> { >> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h >> index a09d9f3..67f6d19 100644 >> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h >> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h >> @@ -26,20 +26,21 @@ >> */ >> >> #ifndef KGD_KFD_INTERFACE_H_INCLUDED >> #define KGD_KFD_INTERFACE_H_INCLUDED >> >> #include <linux/types.h> >> >> struct pci_dev; >> >> #define KFD_INTERFACE_VERSION 1 >> +#define KGD_MAX_QUEUES 128 >> >> struct kfd_dev; >> struct kgd_dev; >> >> struct kgd_mem; >> >> enum kgd_memory_pool { >> KGD_POOL_SYSTEM_CACHEABLE = 1, >> KGD_POOL_SYSTEM_WRITECOMBINE = 2, >> KGD_POOL_FRAMEBUFFER = 3, >> -- >> 2.9.3 >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx