Comments inline. On 17-04-06 02:21 AM, Andres Rodriguez wrote: > Previously the queue/pipe split with kfd operated with pipe > granularity. This patch allows amdgpu to take ownership of an arbitrary > set of queues. > > It also consolidates the last few magic numbers in the compute > initialization process into mec_init. > > v2: support for gfx9 > v3: renamed AMDGPU_MAX_QUEUES to AMDGPU_MAX_COMPUTE_QUEUES > > Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> > Acked-by: Christian König <christian.koenig at amd.com> > Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++ > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 83 +++++++++++++++++------- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 81 +++++++++++++++++++----- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 84 +++++++++++++++++++++++-- > drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 + > 5 files changed, 212 insertions(+), 44 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 8187587..b765aca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h [snip] > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index 3340012..55e61a9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -42,21 +42,20 @@ > #include "gca/gfx_7_2_enum.h" > #include "gca/gfx_7_2_sh_mask.h" > > #include "gmc/gmc_7_0_d.h" > #include "gmc/gmc_7_0_sh_mask.h" > > #include "oss/oss_2_0_d.h" > #include "oss/oss_2_0_sh_mask.h" > > #define GFX7_NUM_GFX_RINGS 1 > -#define GFX7_NUM_COMPUTE_RINGS 8 > #define GFX7_MEC_HPD_SIZE 2048 > > > static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); > static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); > static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); > > MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); > MODULE_FIRMWARE("radeon/bonaire_me.bin"); > MODULE_FIRMWARE("radeon/bonaire_ce.bin"); > @@ -2817,47 +2816,79 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) > if (unlikely(r != 0)) > dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); > amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); > amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); > adev->gfx.mec.hpd_eop_obj = NULL; > } > } > > +static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev) > +{ > + int i, queue, pipe, mec; > + > + /* policy for amdgpu compute queue ownership */ > + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { > + queue = i % adev->gfx.mec.num_queue_per_pipe; > + pipe = (i / adev->gfx.mec.num_queue_per_pipe) > + % adev->gfx.mec.num_pipe_per_mec; > + mec = (i / adev->gfx.mec.num_queue_per_pipe) > + / adev->gfx.mec.num_pipe_per_mec; > + > + /* we've run out of HW */ > + if (mec > adev->gfx.mec.num_mec) [FK] Shouldn't this be >=? > + break; > + > + /* policy: amdgpu owns all queues in the first pipe */ > + if (mec == 0 && pipe == 0) > + set_bit(i, adev->gfx.mec.queue_bitmap); > + } > + > + /* update the number of active compute rings */ > + adev->gfx.num_compute_rings = > + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); > + > + /* If you hit this case and edited the policy, you probably just > + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ > + WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS); > + if (adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS) [FK] You can write this shorter: if (WARN_ON(condition)) statement; > + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > +} > + [snip] > @@ -4769,42 +4800,52 @@ static int gfx_v7_0_sw_init(void *handle) > ring = &adev->gfx.gfx_ring[i]; > ring->ring_obj = NULL; > sprintf(ring->name, "gfx"); > r = amdgpu_ring_init(adev, ring, 1024, > &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); > if (r) > return r; > } > > /* set up the compute queues */ > - for (i = 0; i < adev->gfx.num_compute_rings; i++) { > + for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) { > unsigned irq_type; > > - /* max 32 queues per MEC */ > - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { > - DRM_ERROR("Too many (%d) compute rings!\n", i); > - break; > - } > - ring = &adev->gfx.compute_ring[i]; > + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) > + continue; > + > + ring = &adev->gfx.compute_ring[ring_id]; > + > + /* mec0 is me1 */ > + ring->me = ((i / adev->gfx.mec.num_queue_per_pipe) > + / adev->gfx.mec.num_pipe_per_mec) > + + 1; > + ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe) > + % adev->gfx.mec.num_pipe_per_mec; > + ring->queue = i % adev->gfx.mec.num_queue_per_pipe; > + > ring->ring_obj = NULL; > ring->use_doorbell = true; > - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; > - ring->me = 1; /* first MEC */ > - ring->pipe = i / 8; > - ring->queue = i % 8; > + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; > sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); > - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; > + > + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP > + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) > + + ring->pipe; > + > /* type-2 packets are deprecated on MEC, use type-3 instead */ > r = amdgpu_ring_init(adev, ring, 1024, > &adev->gfx.eop_irq, irq_type); > if (r) > return r; > + > + ring_id++; [FK] Is there any case where ring_id != i? I'm just wondering what's the purpose of a separate variable for this. I'm guessing it's for a later change in the queue allocation policy ... > } > > /* reserve GDS, GWS and OA resource for gfx */ > r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, > PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, > &adev->gds.gds_gfx_bo, NULL, NULL); > if (r) > return r; > > r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index b34e597..c010b5d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -45,21 +45,20 @@ > #include "gca/gfx_8_0_enum.h" > #include "gca/gfx_8_0_sh_mask.h" > #include "gca/gfx_8_0_enum.h" > > #include "dce/dce_10_0_d.h" > #include "dce/dce_10_0_sh_mask.h" > > #include "smu/smu_7_1_3_d.h" > > #define GFX8_NUM_GFX_RINGS 1 > -#define GFX8_NUM_COMPUTE_RINGS 8 > #define GFX8_MEC_HPD_SIZE 2048 > > > #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 > #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 > #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 > #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 > > #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) > #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) > @@ -1410,47 +1409,82 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, > } > static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, > struct amdgpu_irq_src *irq) > { > amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); > amdgpu_ring_fini(ring); > } > > #define GFX8_MEC_HPD_SIZE 2048 > > +static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev) > +{ > + int i, queue, pipe, mec; > + > + /* policy for amdgpu compute queue ownership */ > + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { > + queue = i % adev->gfx.mec.num_queue_per_pipe; > + pipe = (i / adev->gfx.mec.num_queue_per_pipe) > + % adev->gfx.mec.num_pipe_per_mec; > + mec = (i / adev->gfx.mec.num_queue_per_pipe) > + / adev->gfx.mec.num_pipe_per_mec; > + > + /* we've run out of HW */ > + if (mec > adev->gfx.mec.num_mec) [FK] >= > + break; > + > + /* policy: amdgpu owns all queues in the first pipe */ > + if (mec == 0 && pipe == 0) > + set_bit(i, adev->gfx.mec.queue_bitmap); > + } [snip] > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 5dcc1b0..72eeda3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -31,21 +31,20 @@ > #include "vega10/GC/gc_9_0_offset.h" > #include "vega10/GC/gc_9_0_sh_mask.h" > #include "vega10/vega10_enum.h" > #include "vega10/HDP/hdp_4_0_offset.h" > > #include "soc15_common.h" > #include "clearstate_gfx9.h" > #include "v9_structs.h" > > #define GFX9_NUM_GFX_RINGS 1 > -#define GFX9_NUM_COMPUTE_RINGS 8 > #define GFX9_NUM_SE 4 > #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 > > MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); > MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); > MODULE_FIRMWARE("amdgpu/vega10_me.bin"); > MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); > MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); > MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); > > @@ -469,45 +468,79 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) > amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); > amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); > > amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); > adev->gfx.mec.mec_fw_obj = NULL; > } > } > > #define MEC_HPD_SIZE 2048 > > +static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev) > +{ > + int i, queue, pipe, mec; > + > + /* policy for amdgpu compute queue ownership */ > + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { > + queue = i % adev->gfx.mec.num_queue_per_pipe; > + pipe = (i / adev->gfx.mec.num_queue_per_pipe) > + % adev->gfx.mec.num_pipe_per_mec; > + mec = (i / adev->gfx.mec.num_queue_per_pipe) > + / adev->gfx.mec.num_pipe_per_mec; > + > + /* we've run out of HW */ > + if (mec > adev->gfx.mec.num_mec) > + break; [FK] >= > + > + /* policy: amdgpu owns all queues in the first pipe */ > + if (mec == 0 && pipe == 0) > + set_bit(i, adev->gfx.mec.queue_bitmap); > + } > + > + /* update the number of active compute rings */ > + adev->gfx.num_compute_rings = > + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); > + > + /* If you hit this case and edited the policy, you probably just > + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ > + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) > + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > +} > + [snip]