Previously the queue/pipe split with kfd operated with pipe granularity. This patch allows amdgpu to take ownership of an arbitrary set of queues. It also consolidates the last few magic numbers in the compute initialization process into mec_init. v2: support for gfx9 v3: renamed AMDGPU_MAX_QUEUES to AMDGPU_MAX_COMPUTE_QUEUES v4: fix off-by-one in num_mec checks in *_compute_queue_acquire Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> Acked-by: Christian König <christian.koenig at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 82 +++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 81 +++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 84 +++++++++++++++++++++++-- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 + 5 files changed, 211 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d6a61e6..56fc327 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -29,40 +29,42 @@ #define __AMDGPU_H__ #include <linux/atomic.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/kref.h> #include <linux/rbtree.h> #include <linux/hashtable.h> #include <linux/dma-fence.h> #include <ttm/ttm_bo_api.h> #include <ttm/ttm_bo_driver.h> #include <ttm/ttm_placement.h> #include <ttm/ttm_module.h> #include <ttm/ttm_execbuf_util.h> #include <drm/drmP.h> #include <drm/drm_gem.h> #include <drm/amdgpu_drm.h> +#include <kgd_kfd_interface.h> + #include "amd_shared.h" #include "amdgpu_mode.h" #include "amdgpu_ih.h" #include "amdgpu_irq.h" #include "amdgpu_ucode.h" #include "amdgpu_ttm.h" #include "amdgpu_psp.h" #include "amdgpu_gds.h" #include "amdgpu_sync.h" #include "amdgpu_ring.h" #include "amdgpu_vm.h" #include "amd_powerplay.h" #include "amdgpu_dpm.h" #include "amdgpu_acp.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "gpu_scheduler.h" #include "amdgpu_virt.h" @@ -875,49 +877,54 @@ struct amdgpu_rlc { /* safe mode for updating CG/PG state */ bool in_safe_mode; const struct amdgpu_rlc_funcs *funcs; /* for firmware data */ u32 save_and_restore_offset; u32 clear_state_descriptor_offset; u32 avail_scratch_ram_locations; u32 reg_restore_list_size; u32 reg_list_format_start; u32 reg_list_format_separate_start; u32 starting_offsets_start; u32 reg_list_format_size_bytes; u32 reg_list_size_bytes; u32 *register_list_format; u32 *register_restore; }; +#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; struct amdgpu_bo *mec_fw_obj; u64 mec_fw_gpu_addr; u32 num_mec; u32 num_pipe_per_mec; u32 num_queue_per_pipe; void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; + + /* These are the resources for which amdgpu takes ownership */ + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; /* * GPU scratch registers structures, functions & helpers */ struct amdgpu_scratch { unsigned num_reg; uint32_t reg_base; uint32_t free_mask; }; /* * GFX configurations diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index daca245..4a680fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -32,41 +32,40 @@ #include "amdgpu_ucode.h" #include "clearstate_ci.h" #include "dce/dce_8_0_d.h" #include "dce/dce_8_0_sh_mask.h" #include "bif/bif_4_1_d.h" #include "bif/bif_4_1_sh_mask.h" #include "gca/gfx_7_0_d.h" #include "gca/gfx_7_2_enum.h" #include "gca/gfx_7_2_sh_mask.h" #include "gmc/gmc_7_0_d.h" #include "gmc/gmc_7_0_sh_mask.h" #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" #define GFX7_NUM_GFX_RINGS 1 -#define GFX7_NUM_COMPUTE_RINGS 8 #define GFX7_MEC_HPD_SIZE 2048 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); MODULE_FIRMWARE("radeon/bonaire_me.bin"); MODULE_FIRMWARE("radeon/bonaire_ce.bin"); MODULE_FIRMWARE("radeon/bonaire_rlc.bin"); MODULE_FIRMWARE("radeon/bonaire_mec.bin"); MODULE_FIRMWARE("radeon/hawaii_pfp.bin"); MODULE_FIRMWARE("radeon/hawaii_me.bin"); MODULE_FIRMWARE("radeon/hawaii_ce.bin"); MODULE_FIRMWARE("radeon/hawaii_rlc.bin"); MODULE_FIRMWARE("radeon/hawaii_mec.bin"); MODULE_FIRMWARE("radeon/kaveri_pfp.bin"); MODULE_FIRMWARE("radeon/kaveri_me.bin"); @@ -2806,67 +2805,98 @@ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev) } } } static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) { int r; if (adev->gfx.mec.hpd_eop_obj) { r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); adev->gfx.mec.hpd_eop_obj = NULL; } } +static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe, mec; + + /* policy for amdgpu compute queue ownership */ + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + queue = i % adev->gfx.mec.num_queue_per_pipe; + pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + mec = (i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + + /* we've run out of HW */ + if (mec >= adev->gfx.mec.num_mec) + break; + + /* policy: amdgpu owns all queues in the first pipe */ + if (mec == 0 && pipe == 0) + set_bit(i, adev->gfx.mec.queue_bitmap); + } + + /* update the number of active compute rings */ + adev->gfx.num_compute_rings = + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* If you hit this case and edited the policy, you probably just + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; +} + static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; size_t mec_hpd_size; - /* - * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total - * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total - * Nonetheless, we assign only 1 pipe because all other pipes will - * be handled by KFD - */ + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + switch (adev->asic_type) { case CHIP_KAVERI: adev->gfx.mec.num_mec = 2; break; case CHIP_BONAIRE: case CHIP_HAWAII: case CHIP_KABINI: case CHIP_MULLINS: default: adev->gfx.mec.num_mec = 1; break; } adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; + /* take ownership of the relevant compute queues */ + gfx_v7_0_compute_queue_acquire(adev); + + /* allocate space for ALL pipes (even the ones we don't own) */ mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * GFX7_MEC_HPD_SIZE * 2; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); if (unlikely(r != 0)) { gfx_v7_0_mec_fini(adev); return r; } r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, @@ -4513,41 +4543,41 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter, .select_se_sh = &gfx_v7_0_select_se_sh, .read_wave_data = &gfx_v7_0_read_wave_data, .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs, }; static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode }; static int gfx_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v7_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); gfx_v7_0_set_irq_funcs(adev); gfx_v7_0_set_gds_init(adev); return 0; } static int gfx_v7_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); if (r) return r; r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); if (r) @@ -4709,41 +4739,41 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK; switch (adev->gfx.config.mem_row_size_in_kb) { case 1: default: gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); break; case 2: gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); break; case 4: gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); break; } adev->gfx.config.gb_addr_config = gb_addr_config; } static int gfx_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, r; + int i, r, ring_id; /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); if (r) return r; gfx_v7_0_scratch_init(adev); @@ -4760,62 +4790,72 @@ static int gfx_v7_0_sw_init(void *handle) } /* allocate mec buffers */ r = gfx_v7_0_mec_init(adev); if (r) { DRM_ERROR("Failed to init MEC BOs!\n"); return r; } for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); if (r) return r; } /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { + for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) { unsigned irq_type; - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; - } - ring = &adev->gfx.compute_ring[i]; + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec) + + 1; + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + ring->queue = i % adev->gfx.mec.num_queue_per_pipe; + ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type); if (r) return r; + + ring_id++; } /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, &adev->gds.gds_gfx_bo, NULL, NULL); if (r) return r; r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, &adev->gds.gws_gfx_bo, NULL, NULL); if (r) return r; r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, &adev->gds.oa_gfx_bo, NULL, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 49332fd..3a49738 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -35,41 +35,40 @@ #include "gmc/gmc_8_2_d.h" #include "gmc/gmc_8_2_sh_mask.h" #include "oss/oss_3_0_d.h" #include "oss/oss_3_0_sh_mask.h" #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_enum.h" #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" #include "smu/smu_7_1_3_d.h" #define GFX8_NUM_GFX_RINGS 1 -#define GFX8_NUM_COMPUTE_RINGS 8 #define GFX8_MEC_HPD_SIZE 2048 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L @@ -1398,67 +1397,102 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, } ring->queue = 0; ring->eop_gpu_addr = kiq->eop_gpu_addr; sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); return r; } static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } +static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe, mec; + + /* policy for amdgpu compute queue ownership */ + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + queue = i % adev->gfx.mec.num_queue_per_pipe; + pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + mec = (i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + + /* we've run out of HW */ + if (mec >= adev->gfx.mec.num_mec) + break; + + /* policy: amdgpu owns all queues in the first pipe */ + if (mec == 0 && pipe == 0) + set_bit(i, adev->gfx.mec.queue_bitmap); + } + + /* update the number of active compute rings */ + adev->gfx.num_compute_rings = + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* If you hit this case and edited the policy, you probably just + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; +} + static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; size_t mec_hpd_size; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_POLARIS10: case CHIP_CARRIZO: adev->gfx.mec.num_mec = 2; break; case CHIP_TOPAZ: case CHIP_STONEY: default: adev->gfx.mec.num_mec = 1; break; } adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; - /* only 1 pipe of the first MEC is owned by amdgpu */ - mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE; + /* take ownership of the relevant compute queues */ + gfx_v8_0_compute_queue_acquire(adev); + + mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); if (unlikely(r != 0)) { gfx_v8_0_mec_fini(adev); return r; } r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, &adev->gfx.mec.hpd_eop_gpu_addr); @@ -2071,41 +2105,41 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) /* fix up row size */ switch (adev->gfx.config.mem_row_size_in_kb) { case 1: default: gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); break; case 2: gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); break; case 4: gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); break; } adev->gfx.config.gb_addr_config = gb_addr_config; return 0; } static int gfx_v8_0_sw_init(void *handle) { - int i, r; + int i, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); if (r) return r; /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; @@ -2138,63 +2172,76 @@ static int gfx_v8_0_sw_init(void *handle) } /* set up the gfx ring */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; sprintf(ring->name, "gfx"); /* no gfx doorbells on iceland */ if (adev->asic_type != CHIP_TOPAZ) { ring->use_doorbell = true; ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; } r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); if (r) return r; } /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { + for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) { unsigned irq_type; - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS)) break; - } - ring = &adev->gfx.compute_ring[i]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec) + + 1; + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + ring->queue = i % adev->gfx.mec.num_queue_per_pipe; + ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE); + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE); + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type); if (r) return r; + + ring_id++; } r = gfx_v8_0_kiq_init(adev); if (r) { DRM_ERROR("Failed to init KIQ BOs!\n"); return r; } kiq = &adev->gfx.kiq; r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); if (r) return r; /* create MQD for all compute queues as well as KIQ for SRIOV case */ r = gfx_v8_0_compute_mqd_sw_init(adev); if (r) return r; /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, @@ -5643,41 +5690,41 @@ static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, uint32_t size, uint32_t *dst) { wave_read_regs( adev, simd, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, .select_se_sh = &gfx_v8_0_select_se_sh, .read_wave_data = &gfx_v8_0_read_wave_data, .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, }; static int gfx_v8_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v8_0_gfx_funcs; gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); gfx_v8_0_set_gds_init(adev); gfx_v8_0_set_rlc_funcs(adev); return 0; } static int gfx_v8_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); if (r) return r; r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ec2ae6f..45d31c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -21,41 +21,40 @@ * */ #include <linux/firmware.h> #include "drmP.h" #include "amdgpu.h" #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" #include "vega10/soc15ip.h" #include "vega10/GC/gc_9_0_offset.h" #include "vega10/GC/gc_9_0_sh_mask.h" #include "vega10/vega10_enum.h" #include "vega10/HDP/hdp_4_0_offset.h" #include "soc15_common.h" #include "clearstate_gfx9.h" #include "v9_structs.h" #define GFX9_NUM_GFX_RINGS 1 -#define GFX9_NUM_COMPUTE_RINGS 8 #define GFX9_NUM_SE 4 #define GFX9_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); MODULE_FIRMWARE("amdgpu/vega10_me.bin"); MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)}, {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)}, {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)}, {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), @@ -458,65 +457,99 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); adev->gfx.mec.hpd_eop_obj = NULL; } if (adev->gfx.mec.mec_fw_obj) { r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); adev->gfx.mec.mec_fw_obj = NULL; } } +static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe, mec; + + /* policy for amdgpu compute queue ownership */ + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + queue = i % adev->gfx.mec.num_queue_per_pipe; + pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + mec = (i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + + /* we've run out of HW */ + if (mec >= adev->gfx.mec.num_mec) + break; + + /* policy: amdgpu owns all queues in the first pipe */ + if (mec == 0 && pipe == 0) + set_bit(i, adev->gfx.mec.queue_bitmap); + } + + /* update the number of active compute rings */ + adev->gfx.num_compute_rings = + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* If you hit this case and edited the policy, you probably just + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; +} + static int gfx_v9_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; const __le32 *fw_data; unsigned fw_size; u32 *fw; size_t mec_hpd_size; const struct gfx_firmware_header_v1_0 *mec_hdr; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + switch (adev->asic_type) { case CHIP_VEGA10: adev->gfx.mec.num_mec = 2; break; default: adev->gfx.mec.num_mec = 1; break; } adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; - /* only 1 pipe of the first MEC is owned by amdgpu */ - mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX9_MEC_HPD_SIZE; + /* take ownership of the relevant compute queues */ + gfx_v9_0_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); if (unlikely(r != 0)) { gfx_v9_0_mec_fini(adev); return r; } r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, &adev->gfx.mec.hpd_eop_gpu_addr); @@ -1011,41 +1044,41 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | PACKET3_DMA_DATA_SRC_SEL(2))); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size); gfx_v9_0_write_data_to_reg(ring, 0, false, amdgpu_gds_reg_offset[0].mem_size, 0); amdgpu_ring_commit(ring); return 0; } static int gfx_v9_0_sw_init(void *handle) { - int i, r; + int i, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); if (r) return r; /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184, &adev->gfx.priv_reg_irq); if (r) return r; @@ -1068,41 +1101,80 @@ static int gfx_v9_0_sw_init(void *handle) r = gfx_v9_0_mec_init(adev); if (r) { DRM_ERROR("Failed to init MEC BOs!\n"); return r; } /* set up the gfx ring */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; sprintf(ring->name, "gfx"); ring->use_doorbell = true; ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); if (r) return r; } /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { + for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) { + unsigned irq_type; + + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS)) + break; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec) + + 1; + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + ring->queue = i % adev->gfx.mec.num_queue_per_pipe; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + + ring_id++; + } + + /* set up the compute queues */ + for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) { unsigned irq_type; /* max 32 queues per MEC */ if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { DRM_ERROR("Too many (%d) compute rings!\n", i); break; } ring = &adev->gfx.compute_ring[i]; ring->ring_obj = NULL; ring->use_doorbell = true; ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1; ring->me = 1; /* first MEC */ ring->pipe = i / 8; ring->queue = i % 8; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX9_MEC_HPD_SIZE); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type); @@ -2451,41 +2523,41 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, gfx_v9_0_write_data_to_reg(ring, 0, false, amdgpu_gds_reg_offset[vmid].mem_size, gds_size); /* GWS */ gfx_v9_0_write_data_to_reg(ring, 0, false, amdgpu_gds_reg_offset[vmid].gws, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); /* OA */ gfx_v9_0_write_data_to_reg(ring, 0, false, amdgpu_gds_reg_offset[vmid].oa, (1 << (oa_size + oa_base)) - (1 << oa_base)); } static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); gfx_v9_0_set_rlc_funcs(adev); return 0; } static int gfx_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); if (r) return r; r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); if (r) return r; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index a09d9f3..67f6d19 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -16,40 +16,41 @@ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * This file defines the private interface between the * AMD kernel graphics drivers and the AMD KFD. */ #ifndef KGD_KFD_INTERFACE_H_INCLUDED #define KGD_KFD_INTERFACE_H_INCLUDED #include <linux/types.h> struct pci_dev; #define KFD_INTERFACE_VERSION 1 +#define KGD_MAX_QUEUES 128 struct kfd_dev; struct kgd_dev; struct kgd_mem; enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, KGD_POOL_FRAMEBUFFER = 3, }; enum kgd_engine_type { KGD_ENGINE_PFP = 1, KGD_ENGINE_ME, KGD_ENGINE_CE, KGD_ENGINE_MEC1, KGD_ENGINE_MEC2, KGD_ENGINE_RLC, KGD_ENGINE_SDMA1, -- 2.9.3