Hi Andres, A little suggestion, if I may. Could you please send future versions of this patch-set with [PATCH vX], where X is the version number ? That would help (at least me) to track the evolution of this patch-set quicker. And another thing, if you haven't done it so far (because I didn't check all the patches), for the patches you change between versions, please add a line inside the patch's commit message detailing the change. e.g: v2: fixed .... v3: changed ... Signed-off-by: ... Thanks, Oded On Thu, Mar 2, 2017 at 11:44 PM, Andres Rodriguez <andresx7 at gmail.com> wrote: > The MQD programming sequence currently exists in 3 different places. > Refactor it to absorb all the duplicates. > > The success path remains mostly identical except for a slightly > different order in the non-kiq case. This shouldn't matter if the HQD > is disabled. > > The error handling paths have been updated to deal with the new code > structure. > > Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++++++++++++++++++---------------- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 417 +++++++++++-------------------- > 2 files changed, 387 insertions(+), 477 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index 1f93545..8e1e601 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -42,20 +42,22 @@ > #include "gca/gfx_7_2_sh_mask.h" > > #include "gmc/gmc_7_0_d.h" > #include "gmc/gmc_7_0_sh_mask.h" > > #include "oss/oss_2_0_d.h" > #include "oss/oss_2_0_sh_mask.h" > > #define GFX7_NUM_GFX_RINGS 1 > #define GFX7_NUM_COMPUTE_RINGS 8 > +#define GFX7_MEC_HPD_SIZE 2048 > + > > static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); > static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); > static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); > > MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); > MODULE_FIRMWARE("radeon/bonaire_me.bin"); > MODULE_FIRMWARE("radeon/bonaire_ce.bin"); > MODULE_FIRMWARE("radeon/bonaire_rlc.bin"); > MODULE_FIRMWARE("radeon/bonaire_mec.bin"); > @@ -2792,40 +2794,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) > if (unlikely(r != 0)) > dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); > amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); > amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); > adev->gfx.mec.hpd_eop_obj = NULL; > } > } > > -#define MEC_HPD_SIZE 2048 > - > static int gfx_v7_0_mec_init(struct amdgpu_device *adev) > { > int r; > u32 *hpd; > > /* > * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total > * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total > * Nonetheless, we assign only 1 pipe because all other pipes will > * be handled by KFD > */ > adev->gfx.mec.num_mec = 1; > adev->gfx.mec.num_pipe = 1; > adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; > > if (adev->gfx.mec.hpd_eop_obj == NULL) { > r = amdgpu_bo_create(adev, > - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, > + adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2, > PAGE_SIZE, true, > AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, > &adev->gfx.mec.hpd_eop_obj); > if (r) { > dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); > return r; > } > } > > r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); > @@ -2841,21 +2841,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) > return r; > } > r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); > if (r) { > dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); > gfx_v7_0_mec_fini(adev); > return r; > } > > /* clear memory. Not sure if this is required or not */ > - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); > + memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2); > > amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > return 0; > } > > struct hqd_registers > { > u32 cp_mqd_base_addr; > @@ -2916,261 +2916,296 @@ struct bonaire_mqd > u32 restart[3]; > u32 thread_trace_enable; > u32 reserved1; > u32 user_data[16]; > u32 vgtcs_invoke_count[2]; > struct hqd_registers queue_state; > u32 dequeue_cntr; > u32 interrupt_queue[64]; > }; > > -/** > - * gfx_v7_0_cp_compute_resume - setup the compute queue registers > - * > - * @adev: amdgpu_device pointer > - * > - * Program the compute queues and test them to make sure they > - * are working. > - * Returns 0 for success, error for failure. > - */ > -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) > +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe) > { > - int r, i, j; > - u32 tmp; > - bool use_doorbell = true; > - u64 hqd_gpu_addr; > - u64 mqd_gpu_addr; > u64 eop_gpu_addr; > - u64 wb_gpu_addr; > - u32 *buf; > - struct bonaire_mqd *mqd; > - struct amdgpu_ring *ring; > - > - /* fix up chicken bits */ > - tmp = RREG32(mmCP_CPF_DEBUG); > - tmp |= (1 << 23); > - WREG32(mmCP_CPF_DEBUG, tmp); > + u32 tmp; > + size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2; > > - /* init the pipes */ > mutex_lock(&adev->srbm_mutex); > - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { > - int me = (i < 4) ? 1 : 2; > - int pipe = (i < 4) ? i : (i - 4); > + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; > > - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); > + cik_srbm_select(adev, me, pipe, 0, 0); > > - cik_srbm_select(adev, me, pipe, 0, 0); > + /* write the EOP addr */ > + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); > + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); > > - /* write the EOP addr */ > - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); > - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); > + /* set the VMID assigned */ > + WREG32(mmCP_HPD_EOP_VMID, 0); > > - /* set the VMID assigned */ > - WREG32(mmCP_HPD_EOP_VMID, 0); > + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > + tmp = RREG32(mmCP_HPD_EOP_CONTROL); > + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; > + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); > + WREG32(mmCP_HPD_EOP_CONTROL, tmp); > > - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > - tmp = RREG32(mmCP_HPD_EOP_CONTROL); > - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; > - tmp |= order_base_2(MEC_HPD_SIZE / 8); > - WREG32(mmCP_HPD_EOP_CONTROL, tmp); > - } > cik_srbm_select(adev, 0, 0, 0, 0); > mutex_unlock(&adev->srbm_mutex); > +} > > - /* init the queues. Just two for now. */ > - for (i = 0; i < adev->gfx.num_compute_rings; i++) { > - ring = &adev->gfx.compute_ring[i]; > +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) > +{ > + int i; > > - if (ring->mqd_obj == NULL) { > - r = amdgpu_bo_create(adev, > - sizeof(struct bonaire_mqd), > - PAGE_SIZE, true, > - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, > - &ring->mqd_obj); > - if (r) { > - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); > - return r; > - } > + /* disable the queue if it's active */ > + if (RREG32(mmCP_HQD_ACTIVE) & 1) { > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); > + for (i = 0; i < adev->usec_timeout; i++) { > + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) > + break; > + udelay(1); > } > > - r = amdgpu_bo_reserve(ring->mqd_obj, false); > - if (unlikely(r != 0)) { > - gfx_v7_0_cp_compute_fini(adev); > - return r; > - } > - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, > - &mqd_gpu_addr); > - if (r) { > - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); > - gfx_v7_0_cp_compute_fini(adev); > - return r; > - } > - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); > - if (r) { > - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); > - gfx_v7_0_cp_compute_fini(adev); > - return r; > - } > + if (i == adev->usec_timeout) > + return -ETIMEDOUT; > > - /* init the mqd struct */ > - memset(buf, 0, sizeof(struct bonaire_mqd)); > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); > + WREG32(mmCP_HQD_PQ_RPTR, 0); > + WREG32(mmCP_HQD_PQ_WPTR, 0); > + } > > - mqd = (struct bonaire_mqd *)buf; > - mqd->header = 0xC0310800; > - mqd->static_thread_mgmt01[0] = 0xffffffff; > - mqd->static_thread_mgmt01[1] = 0xffffffff; > - mqd->static_thread_mgmt23[0] = 0xffffffff; > - mqd->static_thread_mgmt23[1] = 0xffffffff; > + return 0; > +} > > - mutex_lock(&adev->srbm_mutex); > - cik_srbm_select(adev, ring->me, > - ring->pipe, > - ring->queue, 0); > +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, > + struct bonaire_mqd *mqd, > + uint64_t mqd_gpu_addr, > + struct amdgpu_ring *ring) > +{ > + u64 hqd_gpu_addr; > + u64 wb_gpu_addr; > > - /* disable wptr polling */ > - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); > - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; > - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); > + /* init the mqd struct */ > + memset(mqd, 0, sizeof(struct bonaire_mqd)); > > - /* enable doorbell? */ > - mqd->queue_state.cp_hqd_pq_doorbell_control = > - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > - if (use_doorbell) > - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; > - else > - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; > - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, > - mqd->queue_state.cp_hqd_pq_doorbell_control); > - > - /* disable the queue if it's active */ > - mqd->queue_state.cp_hqd_dequeue_request = 0; > - mqd->queue_state.cp_hqd_pq_rptr = 0; > - mqd->queue_state.cp_hqd_pq_wptr= 0; > - if (RREG32(mmCP_HQD_ACTIVE) & 1) { > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); > - for (j = 0; j < adev->usec_timeout; j++) { > - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) > - break; > - udelay(1); > - } > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); > - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); > - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); > - } > + mqd->header = 0xC0310800; > + mqd->static_thread_mgmt01[0] = 0xffffffff; > + mqd->static_thread_mgmt01[1] = 0xffffffff; > + mqd->static_thread_mgmt23[0] = 0xffffffff; > + mqd->static_thread_mgmt23[1] = 0xffffffff; > > - /* set the pointer to the MQD */ > - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; > - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); > - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); > - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); > - /* set MQD vmid to 0 */ > - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); > - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; > - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); > - > - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > - hqd_gpu_addr = ring->gpu_addr >> 8; > - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; > - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); > - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); > - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); > - > - /* set up the HQD, this is similar to CP_RB0_CNTL */ > - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); > - mqd->queue_state.cp_hqd_pq_control &= > - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | > - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); > - > - mqd->queue_state.cp_hqd_pq_control |= > - order_base_2(ring->ring_size / 8); > - mqd->queue_state.cp_hqd_pq_control |= > - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); > + /* enable doorbell? */ > + mqd->queue_state.cp_hqd_pq_doorbell_control = > + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > + if (ring->use_doorbell) > + mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; > + else > + mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; > + > + /* set the pointer to the MQD */ > + mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; > + mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); > + > + /* set MQD vmid to 0 */ > + mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); > + mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; > + > + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > + hqd_gpu_addr = ring->gpu_addr >> 8; > + mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; > + mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); > + > + /* set up the HQD, this is similar to CP_RB0_CNTL */ > + mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); > + mqd->queue_state.cp_hqd_pq_control &= > + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | > + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); > + > + mqd->queue_state.cp_hqd_pq_control |= > + order_base_2(ring->ring_size / 8); > + mqd->queue_state.cp_hqd_pq_control |= > + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); > #ifdef __BIG_ENDIAN > - mqd->queue_state.cp_hqd_pq_control |= > - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; > + mqd->queue_state.cp_hqd_pq_control |= > + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; > #endif > - mqd->queue_state.cp_hqd_pq_control &= > - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | > + mqd->queue_state.cp_hqd_pq_control &= > + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | > CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | > CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); > - mqd->queue_state.cp_hqd_pq_control |= > - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | > - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ > - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); > - > - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ > - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); > - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; > - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; > - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); > - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, > - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); > - > - /* set the wb address wether it's enabled or not */ > - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); > - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; > - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = > - upper_32_bits(wb_gpu_addr) & 0xffff; > - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, > - mqd->queue_state.cp_hqd_pq_rptr_report_addr); > - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, > - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); > - > - /* enable the doorbell if requested */ > - if (use_doorbell) { > - mqd->queue_state.cp_hqd_pq_doorbell_control = > - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > - mqd->queue_state.cp_hqd_pq_doorbell_control &= > - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; > - mqd->queue_state.cp_hqd_pq_doorbell_control |= > - (ring->doorbell_index << > - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); > - mqd->queue_state.cp_hqd_pq_doorbell_control |= > - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; > - mqd->queue_state.cp_hqd_pq_doorbell_control &= > - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | > - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); > + mqd->queue_state.cp_hqd_pq_control |= > + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | > + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ > > - } else { > - mqd->queue_state.cp_hqd_pq_doorbell_control = 0; > + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ > + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); > + mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; > + mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; > + > + /* set the wb address wether it's enabled or not */ > + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); > + mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; > + mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = > + upper_32_bits(wb_gpu_addr) & 0xffff; > + > + /* enable the doorbell if requested */ > + if (ring->use_doorbell) { > + mqd->queue_state.cp_hqd_pq_doorbell_control = > + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > + mqd->queue_state.cp_hqd_pq_doorbell_control &= > + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; > + mqd->queue_state.cp_hqd_pq_doorbell_control |= > + (ring->doorbell_index << > + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); > + mqd->queue_state.cp_hqd_pq_doorbell_control |= > + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; > + mqd->queue_state.cp_hqd_pq_doorbell_control &= > + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | > + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); > + > + } else { > + mqd->queue_state.cp_hqd_pq_doorbell_control = 0; > + } > + > + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > + ring->wptr = 0; > + mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; > + mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); > + > + /* set the vmid for the queue */ > + mqd->queue_state.cp_hqd_vmid = 0; > + > + /* activate the queue */ > + mqd->queue_state.cp_hqd_active = 1; > +} > + > +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, > + struct bonaire_mqd *mqd) > +{ > + u32 tmp; > + > + /* disable wptr polling */ > + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); > + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); > + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); > + > + /* program MQD field to HW */ > + WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); > + WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); > + WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); > + WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); > + WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); > + WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); > + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); > + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); > + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr); > + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); > + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control); > + WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); > + WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); > + > + /* activate the HQD */ > + WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); > + > + return 0; > +} > + > +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) > +{ > + int r; > + u64 mqd_gpu_addr; > + struct bonaire_mqd *mqd; > + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; > + > + if (ring->mqd_obj == NULL) { > + r = amdgpu_bo_create(adev, > + sizeof(struct bonaire_mqd), > + PAGE_SIZE, true, > + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, > + &ring->mqd_obj); > + if (r) { > + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); > + return r; > } > - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, > - mqd->queue_state.cp_hqd_pq_doorbell_control); > + } > + > + r = amdgpu_bo_reserve(ring->mqd_obj, false); > + if (unlikely(r != 0)) > + goto out; > + > + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, > + &mqd_gpu_addr); > + if (r) { > + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); > + goto out_unreserve; > + } > + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); > + if (r) { > + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); > + goto out_unreserve; > + } > + > + mutex_lock(&adev->srbm_mutex); > + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > > - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > - ring->wptr = 0; > - mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; > - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); > - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); > + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); > + gfx_v7_0_mqd_deactivate(adev); > + gfx_v7_0_mqd_commit(adev, mqd); > > - /* set the vmid for the queue */ > - mqd->queue_state.cp_hqd_vmid = 0; > - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); > + cik_srbm_select(adev, 0, 0, 0, 0); > + mutex_unlock(&adev->srbm_mutex); > > - /* activate the queue */ > - mqd->queue_state.cp_hqd_active = 1; > - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); > + amdgpu_bo_kunmap(ring->mqd_obj); > +out_unreserve: > + amdgpu_bo_unreserve(ring->mqd_obj); > +out: > + return 0; > +} > + > +/** > + * gfx_v7_0_cp_compute_resume - setup the compute queue registers > + * > + * @adev: amdgpu_device pointer > + * > + * Program the compute queues and test them to make sure they > + * are working. > + * Returns 0 for success, error for failure. > + */ > +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) > +{ > + int r, i, j; > + u32 tmp; > + struct amdgpu_ring *ring; > > - cik_srbm_select(adev, 0, 0, 0, 0); > - mutex_unlock(&adev->srbm_mutex); > + /* fix up chicken bits */ > + tmp = RREG32(mmCP_CPF_DEBUG); > + tmp |= (1 << 23); > + WREG32(mmCP_CPF_DEBUG, tmp); > > - amdgpu_bo_kunmap(ring->mqd_obj); > - amdgpu_bo_unreserve(ring->mqd_obj); > + /* init the pipes */ > + for (i = 0; i < adev->gfx.mec.num_mec; i++) > + for (j = 0; j < adev->gfx.mec.num_pipe; j++) > + gfx_v7_0_compute_pipe_init(adev, i, j); > > - ring->ready = true; > + /* init the queues */ > + for (i = 0; i < adev->gfx.num_compute_rings; i++) { > + r = gfx_v7_0_compute_queue_init(adev, i); > + if (r) { > + gfx_v7_0_cp_compute_fini(adev); > + return r; > + } > } > > gfx_v7_0_cp_compute_enable(adev, true); > > for (i = 0; i < adev->gfx.num_compute_rings; i++) { > ring = &adev->gfx.compute_ring[i]; > - > + ring->ready = true; > r = amdgpu_ring_test_ring(ring); > if (r) > ring->ready = false; > } > > return 0; > } > > static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable) > { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 67afc90..1c8589a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -46,20 +46,22 @@ > #include "gca/gfx_8_0_sh_mask.h" > #include "gca/gfx_8_0_enum.h" > > #include "dce/dce_10_0_d.h" > #include "dce/dce_10_0_sh_mask.h" > > #include "smu/smu_7_1_3_d.h" > > #define GFX8_NUM_GFX_RINGS 1 > #define GFX8_NUM_COMPUTE_RINGS 8 > +#define GFX8_MEC_HPD_SIZE 2048 > + > > #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 > #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 > #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 > #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 > > #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) > #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) > #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) > #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) > @@ -1409,38 +1411,38 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, > static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, > struct amdgpu_irq_src *irq) > { > if (amdgpu_sriov_vf(ring->adev)) > amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); > > amdgpu_ring_fini(ring); > irq->data = NULL; > } > > -#define MEC_HPD_SIZE 2048 > +#define GFX8_MEC_HPD_SIZE 2048 > > static int gfx_v8_0_mec_init(struct amdgpu_device *adev) > { > int r; > u32 *hpd; > > /* > * we assign only 1 pipe because all other pipes will > * be handled by KFD > */ > adev->gfx.mec.num_mec = 1; > adev->gfx.mec.num_pipe = 1; > adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; > > if (adev->gfx.mec.hpd_eop_obj == NULL) { > r = amdgpu_bo_create(adev, > - adev->gfx.mec.num_queue * MEC_HPD_SIZE, > + adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE, > PAGE_SIZE, true, > AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, > &adev->gfx.mec.hpd_eop_obj); > if (r) { > dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); > return r; > } > } > > r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); > @@ -1455,21 +1457,21 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) > gfx_v8_0_mec_fini(adev); > return r; > } > r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); > if (r) { > dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); > gfx_v8_0_mec_fini(adev); > return r; > } > > - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); > + memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE); > > amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); > amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); > > return 0; > } > > static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) > { > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > @@ -1477,29 +1479,29 @@ static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) > amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); > kiq->eop_obj = NULL; > } > > static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) > { > int r; > u32 *hpd; > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > > - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, > + r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE, > AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, > &kiq->eop_gpu_addr, (void **)&hpd); > if (r) { > dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); > return r; > } > > - memset(hpd, 0, MEC_HPD_SIZE); > + memset(hpd, 0, GFX8_MEC_HPD_SIZE); > > amdgpu_bo_kunmap(kiq->eop_obj); > > return 0; > } > > static const u32 vgpr_init_compute_shader[] = > { > 0x7e000209, 0x7e020208, > 0x7e040207, 0x7e060206, > @@ -4658,56 +4660,54 @@ static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, > > static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, > struct vi_mqd *mqd, > uint64_t mqd_gpu_addr, > uint64_t eop_gpu_addr, > struct amdgpu_ring *ring) > { > uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; > uint32_t tmp; > > + /* init the mqd struct */ > + memset(mqd, 0, sizeof(struct vi_mqd)); > + > mqd->header = 0xC0310800; > mqd->compute_pipelinestat_enable = 0x00000001; > mqd->compute_static_thread_mgmt_se0 = 0xffffffff; > mqd->compute_static_thread_mgmt_se1 = 0xffffffff; > mqd->compute_static_thread_mgmt_se2 = 0xffffffff; > mqd->compute_static_thread_mgmt_se3 = 0xffffffff; > mqd->compute_misc_reserved = 0x00000003; > > eop_base_addr = eop_gpu_addr >> 8; > mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; > mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); > > /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > tmp = RREG32(mmCP_HQD_EOP_CONTROL); > tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, > - (order_base_2(MEC_HPD_SIZE / 4) - 1)); > + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); > > mqd->cp_hqd_eop_control = tmp; > > /* enable doorbell? */ > tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > > if (ring->use_doorbell) > tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > DOORBELL_EN, 1); > else > tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > DOORBELL_EN, 0); > > mqd->cp_hqd_pq_doorbell_control = tmp; > > - /* disable the queue if it's active */ > - mqd->cp_hqd_dequeue_request = 0; > - mqd->cp_hqd_pq_rptr = 0; > - mqd->cp_hqd_pq_wptr = 0; > - > /* set the pointer to the MQD */ > mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; > mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); > > /* set MQD vmid to 0 */ > tmp = RREG32(mmCP_MQD_CONTROL); > tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); > mqd->cp_mqd_control = tmp; > > /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > @@ -4769,53 +4769,87 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, > tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); > tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); > mqd->cp_hqd_persistent_state = tmp; > > /* activate the queue */ > mqd->cp_hqd_active = 1; > > return 0; > } > > -static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, > - struct vi_mqd *mqd, > - struct amdgpu_ring *ring) > +static int gfx_v8_0_mqd_deactivate(struct amdgpu_device *adev) > +{ > + int i; > + > + /* disable the queue if it's active */ > + if (RREG32(mmCP_HQD_ACTIVE) & 1) { > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); > + for (i = 0; i < adev->usec_timeout; i++) { > + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) > + break; > + udelay(1); > + } > + > + if (i == adev->usec_timeout) > + return -ETIMEDOUT; > + > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); > + WREG32(mmCP_HQD_PQ_RPTR, 0); > + WREG32(mmCP_HQD_PQ_WPTR, 0); > + } > + > + return 0; > +} > + > +static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable) > +{ > + uint32_t tmp; > + > + if (!enable) > + return; > + > + if ((adev->asic_type == CHIP_CARRIZO) || > + (adev->asic_type == CHIP_FIJI) || > + (adev->asic_type == CHIP_STONEY) || > + (adev->asic_type == CHIP_POLARIS11) || > + (adev->asic_type == CHIP_POLARIS10)) { > + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); > + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); > + } > + > + tmp = RREG32(mmCP_PQ_STATUS); > + tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); > + WREG32(mmCP_PQ_STATUS, tmp); > +} > + > +static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd) > { > uint32_t tmp; > - int j; > > /* disable wptr polling */ > tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); > tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); > WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); > > WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); > WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); > > /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); > > /* enable doorbell? */ > WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); > > - /* disable the queue if it's active */ > - if (RREG32(mmCP_HQD_ACTIVE) & 1) { > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); > - for (j = 0; j < adev->usec_timeout; j++) { > - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) > - break; > - udelay(1); > - } > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); > - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); > - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > - } > + /* set pq read/write pointers */ > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); > + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); > + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > > /* set the pointer to the MQD */ > WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); > WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); > > /* set MQD vmid to 0 */ > WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); > > /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); > @@ -4828,78 +4862,65 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, > WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, > mqd->cp_hqd_pq_rptr_report_addr_lo); > WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, > mqd->cp_hqd_pq_rptr_report_addr_hi); > > /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ > WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); > WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); > > /* enable the doorbell if requested */ > - if (ring->use_doorbell) { > - if ((adev->asic_type == CHIP_CARRIZO) || > - (adev->asic_type == CHIP_FIJI) || > - (adev->asic_type == CHIP_STONEY)) { > - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, > - AMDGPU_DOORBELL_KIQ << 2); > - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, > - AMDGPU_DOORBELL_MEC_RING7 << 2); > - } > - } > WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); > > /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > > /* set the vmid for the queue */ > WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); > > WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); > > /* activate the queue */ > WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); > > - if (ring->use_doorbell) { > - tmp = RREG32(mmCP_PQ_STATUS); > - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); > - WREG32(mmCP_PQ_STATUS, tmp); > - } > - > return 0; > } > > -static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, > +static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring, > struct vi_mqd *mqd, > u64 mqd_gpu_addr) > { > struct amdgpu_device *adev = ring->adev; > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > uint64_t eop_gpu_addr; > bool is_kiq = false; > > if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) > is_kiq = true; > > if (is_kiq) { > eop_gpu_addr = kiq->eop_gpu_addr; > gfx_v8_0_kiq_setting(&kiq->ring); > } else > eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + > - ring->queue * MEC_HPD_SIZE; > + ring->queue * GFX8_MEC_HPD_SIZE; > > mutex_lock(&adev->srbm_mutex); > vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > > gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); > > - if (is_kiq) > - gfx_v8_0_kiq_init_register(adev, mqd, ring); > + if (is_kiq) { > + gfx_v8_0_mqd_deactivate(adev); > + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell); > + gfx_v8_0_mqd_commit(adev, mqd); > + } > > vi_srbm_select(adev, 0, 0, 0, 0); > mutex_unlock(&adev->srbm_mutex); > > if (is_kiq) > gfx_v8_0_kiq_enable(ring); > else > gfx_v8_0_map_queue_enable(&kiq->ring, ring); > > return 0; > @@ -4922,33 +4943,34 @@ static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev) > } > > static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev, > struct amdgpu_ring *ring) > { > struct vi_mqd *mqd; > u64 mqd_gpu_addr; > u32 *buf; > int r = 0; > > - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, > - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, > - &mqd_gpu_addr, (void **)&buf); > + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), > + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, > + &ring->mqd_obj, &mqd_gpu_addr, > + (void **)&buf); > if (r) { > dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); > return r; > } > > /* init the mqd struct */ > memset(buf, 0, sizeof(struct vi_mqd)); > mqd = (struct vi_mqd *)buf; > > - r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr); > + r = gfx_v8_0_kiq_queue_init(ring, mqd, mqd_gpu_addr); > if (r) > return r; > > amdgpu_bo_kunmap(ring->mqd_obj); > > return 0; > } > > static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) > { > @@ -4980,260 +5002,113 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) > > ring = &adev->gfx.kiq.ring; > ring->ready = true; > r = amdgpu_ring_test_ring(ring); > if (r) > ring->ready = false; > > return 0; > } > > -static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) > +static int gfx_v8_0_compute_queue_init(struct amdgpu_device *adev, > + int ring_id) > { > - int r, i, j; > - u32 tmp; > - bool use_doorbell = true; > - u64 hqd_gpu_addr; > - u64 mqd_gpu_addr; > + int r; > u64 eop_gpu_addr; > - u64 wb_gpu_addr; > - u32 *buf; > + u64 mqd_gpu_addr; > struct vi_mqd *mqd; > + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; > > - /* init the queues. */ > - for (i = 0; i < adev->gfx.num_compute_rings; i++) { > - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; > - > - if (ring->mqd_obj == NULL) { > - r = amdgpu_bo_create(adev, > - sizeof(struct vi_mqd), > - PAGE_SIZE, true, > - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, > - NULL, &ring->mqd_obj); > - if (r) { > - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); > - return r; > - } > - } > - > - r = amdgpu_bo_reserve(ring->mqd_obj, false); > - if (unlikely(r != 0)) { > - gfx_v8_0_cp_compute_fini(adev); > - return r; > - } > - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, > - &mqd_gpu_addr); > - if (r) { > - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); > - gfx_v8_0_cp_compute_fini(adev); > - return r; > - } > - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); > + if (ring->mqd_obj == NULL) { > + r = amdgpu_bo_create(adev, > + sizeof(struct vi_mqd), > + PAGE_SIZE, true, > + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, > + NULL, &ring->mqd_obj); > if (r) { > - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); > - gfx_v8_0_cp_compute_fini(adev); > + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); > return r; > } > + } > > - /* init the mqd struct */ > - memset(buf, 0, sizeof(struct vi_mqd)); > - > - mqd = (struct vi_mqd *)buf; > - mqd->header = 0xC0310800; > - mqd->compute_pipelinestat_enable = 0x00000001; > - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; > - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; > - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; > - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; > - mqd->compute_misc_reserved = 0x00000003; > - > - mutex_lock(&adev->srbm_mutex); > - vi_srbm_select(adev, ring->me, > - ring->pipe, > - ring->queue, 0); > - > - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); > - eop_gpu_addr >>= 8; > - > - /* write the EOP addr */ > - WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); > - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); > - > - /* set the VMID assigned */ > - WREG32(mmCP_HQD_VMID, 0); > - > - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > - tmp = RREG32(mmCP_HQD_EOP_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, > - (order_base_2(MEC_HPD_SIZE / 4) - 1)); > - WREG32(mmCP_HQD_EOP_CONTROL, tmp); > - > - /* disable wptr polling */ > - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); > - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); > - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); > - > - mqd->cp_hqd_eop_base_addr_lo = > - RREG32(mmCP_HQD_EOP_BASE_ADDR); > - mqd->cp_hqd_eop_base_addr_hi = > - RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); > - > - /* enable doorbell? */ > - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > - if (use_doorbell) { > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); > - } else { > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); > - } > - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); > - mqd->cp_hqd_pq_doorbell_control = tmp; > - > - /* disable the queue if it's active */ > - mqd->cp_hqd_dequeue_request = 0; > - mqd->cp_hqd_pq_rptr = 0; > - mqd->cp_hqd_pq_wptr= 0; > - if (RREG32(mmCP_HQD_ACTIVE) & 1) { > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); > - for (j = 0; j < adev->usec_timeout; j++) { > - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) > - break; > - udelay(1); > - } > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); > - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); > - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > - } > + r = amdgpu_bo_reserve(ring->mqd_obj, false); > + if (unlikely(r != 0)) > + goto out; > > - /* set the pointer to the MQD */ > - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; > - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); > - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); > - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); > - > - /* set MQD vmid to 0 */ > - tmp = RREG32(mmCP_MQD_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); > - WREG32(mmCP_MQD_CONTROL, tmp); > - mqd->cp_mqd_control = tmp; > - > - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > - hqd_gpu_addr = ring->gpu_addr >> 8; > - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; > - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); > - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); > - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); > - > - /* set up the HQD, this is similar to CP_RB0_CNTL */ > - tmp = RREG32(mmCP_HQD_PQ_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, > - (order_base_2(ring->ring_size / 4) - 1)); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, > - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); > -#ifdef __BIG_ENDIAN > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); > -#endif > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); > - WREG32(mmCP_HQD_PQ_CONTROL, tmp); > - mqd->cp_hqd_pq_control = tmp; > - > - /* set the wb address wether it's enabled or not */ > - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); > - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; > - mqd->cp_hqd_pq_rptr_report_addr_hi = > - upper_32_bits(wb_gpu_addr) & 0xffff; > - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, > - mqd->cp_hqd_pq_rptr_report_addr_lo); > - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, > - mqd->cp_hqd_pq_rptr_report_addr_hi); > - > - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ > - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); > - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; > - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; > - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); > - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, > - mqd->cp_hqd_pq_wptr_poll_addr_hi); > - > - /* enable the doorbell if requested */ > - if (use_doorbell) { > - if ((adev->asic_type == CHIP_CARRIZO) || > - (adev->asic_type == CHIP_FIJI) || > - (adev->asic_type == CHIP_STONEY) || > - (adev->asic_type == CHIP_POLARIS11) || > - (adev->asic_type == CHIP_POLARIS10) || > - (adev->asic_type == CHIP_POLARIS12)) { > - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, > - AMDGPU_DOORBELL_KIQ << 2); > - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, > - AMDGPU_DOORBELL_MEC_RING7 << 2); > - } > - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > - DOORBELL_OFFSET, ring->doorbell_index); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); > - mqd->cp_hqd_pq_doorbell_control = tmp; > + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, > + &mqd_gpu_addr); > + if (r) { > + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); > + goto out_unreserve; > + } > + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); > + if (r) { > + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); > + goto out_unreserve; > + } > > - } else { > - mqd->cp_hqd_pq_doorbell_control = 0; > - } > - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, > - mqd->cp_hqd_pq_doorbell_control); > - > - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > - ring->wptr = 0; > - mqd->cp_hqd_pq_wptr = ring->wptr; > - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > - mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); > - > - /* set the vmid for the queue */ > - mqd->cp_hqd_vmid = 0; > - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); > - > - tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); > - WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); > - mqd->cp_hqd_persistent_state = tmp; > - if (adev->asic_type == CHIP_STONEY || > - adev->asic_type == CHIP_POLARIS11 || > - adev->asic_type == CHIP_POLARIS10 || > - adev->asic_type == CHIP_POLARIS12) { > - tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); > - tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); > - WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); > - } > + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE); > + eop_gpu_addr >>= 8; > + > + /* init the mqd struct */ > + memset(mqd, 0, sizeof(struct vi_mqd)); > + > + mutex_lock(&adev->srbm_mutex); > + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > + > + gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); > > - /* activate the queue */ > - mqd->cp_hqd_active = 1; > - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); > + gfx_v8_0_mqd_deactivate(adev); > + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell); > + gfx_v8_0_mqd_commit(adev, mqd); > > - vi_srbm_select(adev, 0, 0, 0, 0); > - mutex_unlock(&adev->srbm_mutex); > + vi_srbm_select(adev, 0, 0, 0, 0); > + mutex_unlock(&adev->srbm_mutex); > + > + amdgpu_bo_kunmap(ring->mqd_obj); > +out_unreserve: > + amdgpu_bo_unreserve(ring->mqd_obj); > +out: > + return r; > +} > > - amdgpu_bo_kunmap(ring->mqd_obj); > - amdgpu_bo_unreserve(ring->mqd_obj); > +static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) > +{ > + int r, i; > + u32 tmp; > + struct amdgpu_ring *ring; > + > + /* Stating with gfxv8, all the pipe specific state was removed > + * The fields have been moved to be per-HQD now. */ > + > + /* init the queues */ > + for (i = 0; i < adev->gfx.num_compute_rings; i++) { > + r = gfx_v8_0_compute_queue_init(adev, i); > + if (r) { > + gfx_v8_0_cp_compute_fini(adev); > + return r; > + } > } > > - if (use_doorbell) { > - tmp = RREG32(mmCP_PQ_STATUS); > - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); > - WREG32(mmCP_PQ_STATUS, tmp); > + if (adev->asic_type == CHIP_STONEY || > + adev->asic_type == CHIP_POLARIS11 || > + adev->asic_type == CHIP_POLARIS10 || > + adev->asic_type == CHIP_POLARIS12) { > + tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); > + tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); > + WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); > } > > gfx_v8_0_cp_compute_enable(adev, true); > > for (i = 0; i < adev->gfx.num_compute_rings; i++) { > - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; > + ring = &adev->gfx.compute_ring[i]; > > ring->ready = true; > r = amdgpu_ring_test_ring(ring); > if (r) > ring->ready = false; > } > > return 0; > } > > -- > 2.9.3 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx