On 2017-04-11 06:08 PM, Alex Deucher wrote: > On Thu, Apr 6, 2017 at 2:21 AM, Andres Rodriguez <andresx7 at gmail.com> wrote: >> The MQD programming sequence currently exists in 3 different places. >> Refactor it to absorb all the duplicates. >> >> The success path remains mostly identical except for a slightly >> different order in the non-kiq case. This shouldn't matter if the HQD >> is disabled. >> >> The error handling paths have been updated to deal with the new code >> structure. >> >> v2: the non-kiq path for gfxv8 was dropped in the rebase >> >> Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> >> Acked-by: Christian König <christian.koenig at amd.com> >> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> >> --- >> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++++++++++++++++++---------------- >> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 110 +++++---- >> 2 files changed, 309 insertions(+), 248 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> index 185cb31..f67ef58 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> @@ -42,20 +42,22 @@ >> #include "gca/gfx_7_2_sh_mask.h" >> >> #include "gmc/gmc_7_0_d.h" >> #include "gmc/gmc_7_0_sh_mask.h" >> >> #include "oss/oss_2_0_d.h" >> #include "oss/oss_2_0_sh_mask.h" >> >> #define GFX7_NUM_GFX_RINGS 1 >> #define GFX7_NUM_COMPUTE_RINGS 8 >> +#define GFX7_MEC_HPD_SIZE 2048 >> + > > Might want to split out that the rename of this define into a separate > patch so it can be applied early. Could probably also split the gfx7 > and gfx8 changes into two patches so they can be applied separately > separately so gfx7 doesn't have to be beholden to the flux in gfx8 at > the moment. > Done >> >> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); >> static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); >> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); >> >> MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); >> MODULE_FIRMWARE("radeon/bonaire_me.bin"); >> MODULE_FIRMWARE("radeon/bonaire_ce.bin"); >> MODULE_FIRMWARE("radeon/bonaire_rlc.bin"); >> MODULE_FIRMWARE("radeon/bonaire_mec.bin"); >> @@ -2814,40 +2816,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) >> if (unlikely(r != 0)) >> dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); >> amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); >> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> >> amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); >> adev->gfx.mec.hpd_eop_obj = NULL; >> } >> } >> >> -#define MEC_HPD_SIZE 2048 >> - >> static int gfx_v7_0_mec_init(struct amdgpu_device *adev) >> { >> int r; >> u32 *hpd; >> >> /* >> * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total >> * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total >> * Nonetheless, we assign only 1 pipe because all other pipes will >> * be handled by KFD >> */ >> adev->gfx.mec.num_mec = 1; >> adev->gfx.mec.num_pipe = 1; >> adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; >> >> if (adev->gfx.mec.hpd_eop_obj == NULL) { >> r = amdgpu_bo_create(adev, >> - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, >> + adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2, >> PAGE_SIZE, true, >> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> &adev->gfx.mec.hpd_eop_obj); >> if (r) { >> dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> return r; >> } >> } >> >> r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); >> @@ -2863,21 +2863,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) >> return r; >> } >> r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); >> if (r) { >> dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); >> gfx_v7_0_mec_fini(adev); >> return r; >> } >> >> /* clear memory. Not sure if this is required or not */ >> - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); >> + memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2); >> >> amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> >> return 0; >> } >> >> struct hqd_registers >> { >> u32 cp_mqd_base_addr; >> @@ -2938,261 +2938,296 @@ struct bonaire_mqd >> u32 restart[3]; >> u32 thread_trace_enable; >> u32 reserved1; >> u32 user_data[16]; >> u32 vgtcs_invoke_count[2]; >> struct hqd_registers queue_state; >> u32 dequeue_cntr; >> u32 interrupt_queue[64]; >> }; >> >> -/** >> - * gfx_v7_0_cp_compute_resume - setup the compute queue registers >> - * >> - * @adev: amdgpu_device pointer >> - * >> - * Program the compute queues and test them to make sure they >> - * are working. >> - * Returns 0 for success, error for failure. >> - */ >> -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) >> +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe) >> { >> - int r, i, j; >> - u32 tmp; >> - bool use_doorbell = true; >> - u64 hqd_gpu_addr; >> - u64 mqd_gpu_addr; >> u64 eop_gpu_addr; >> - u64 wb_gpu_addr; >> - u32 *buf; >> - struct bonaire_mqd *mqd; >> - struct amdgpu_ring *ring; >> - >> - /* fix up chicken bits */ >> - tmp = RREG32(mmCP_CPF_DEBUG); >> - tmp |= (1 << 23); >> - WREG32(mmCP_CPF_DEBUG, tmp); >> + u32 tmp; >> + size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2; >> >> - /* init the pipes */ >> mutex_lock(&adev->srbm_mutex); >> - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { >> - int me = (i < 4) ? 1 : 2; >> - int pipe = (i < 4) ? i : (i - 4); >> + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; >> >> - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); >> + cik_srbm_select(adev, me, pipe, 0, 0); >> >> - cik_srbm_select(adev, me, pipe, 0, 0); >> + /* write the EOP addr */ >> + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); >> + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); >> >> - /* write the EOP addr */ >> - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); >> - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); >> + /* set the VMID assigned */ >> + WREG32(mmCP_HPD_EOP_VMID, 0); >> >> - /* set the VMID assigned */ >> - WREG32(mmCP_HPD_EOP_VMID, 0); >> + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ >> + tmp = RREG32(mmCP_HPD_EOP_CONTROL); >> + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; >> + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); >> + WREG32(mmCP_HPD_EOP_CONTROL, tmp); >> >> - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ >> - tmp = RREG32(mmCP_HPD_EOP_CONTROL); >> - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; >> - tmp |= order_base_2(MEC_HPD_SIZE / 8); >> - WREG32(mmCP_HPD_EOP_CONTROL, tmp); >> - } >> cik_srbm_select(adev, 0, 0, 0, 0); >> mutex_unlock(&adev->srbm_mutex); >> +} >> >> - /* init the queues. Just two for now. */ >> - for (i = 0; i < adev->gfx.num_compute_rings; i++) { >> - ring = &adev->gfx.compute_ring[i]; >> +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) >> +{ >> + int i; >> >> - if (ring->mqd_obj == NULL) { >> - r = amdgpu_bo_create(adev, >> - sizeof(struct bonaire_mqd), >> - PAGE_SIZE, true, >> - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> - &ring->mqd_obj); >> - if (r) { >> - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); >> - return r; >> - } >> + /* disable the queue if it's active */ >> + if (RREG32(mmCP_HQD_ACTIVE) & 1) { >> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); >> + for (i = 0; i < adev->usec_timeout; i++) { >> + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) >> + break; >> + udelay(1); >> } >> >> - r = amdgpu_bo_reserve(ring->mqd_obj, false); >> - if (unlikely(r != 0)) { >> - gfx_v7_0_cp_compute_fini(adev); >> - return r; >> - } >> - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, >> - &mqd_gpu_addr); >> - if (r) { >> - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); >> - gfx_v7_0_cp_compute_fini(adev); >> - return r; >> - } >> - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); >> - if (r) { >> - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); >> - gfx_v7_0_cp_compute_fini(adev); >> - return r; >> - } >> + if (i == adev->usec_timeout) >> + return -ETIMEDOUT; >> >> - /* init the mqd struct */ >> - memset(buf, 0, sizeof(struct bonaire_mqd)); >> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); >> + WREG32(mmCP_HQD_PQ_RPTR, 0); >> + WREG32(mmCP_HQD_PQ_WPTR, 0); >> + } >> >> - mqd = (struct bonaire_mqd *)buf; >> - mqd->header = 0xC0310800; >> - mqd->static_thread_mgmt01[0] = 0xffffffff; >> - mqd->static_thread_mgmt01[1] = 0xffffffff; >> - mqd->static_thread_mgmt23[0] = 0xffffffff; >> - mqd->static_thread_mgmt23[1] = 0xffffffff; >> + return 0; >> +} >> >> - mutex_lock(&adev->srbm_mutex); >> - cik_srbm_select(adev, ring->me, >> - ring->pipe, >> - ring->queue, 0); >> +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, >> + struct bonaire_mqd *mqd, >> + uint64_t mqd_gpu_addr, >> + struct amdgpu_ring *ring) >> +{ >> + u64 hqd_gpu_addr; >> + u64 wb_gpu_addr; >> >> - /* disable wptr polling */ >> - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); >> - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; >> - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); >> + /* init the mqd struct */ >> + memset(mqd, 0, sizeof(struct bonaire_mqd)); >> >> - /* enable doorbell? */ >> - mqd->queue_state.cp_hqd_pq_doorbell_control = >> - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); >> - if (use_doorbell) >> - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; >> - else >> - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; >> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, >> - mqd->queue_state.cp_hqd_pq_doorbell_control); >> - >> - /* disable the queue if it's active */ >> - mqd->queue_state.cp_hqd_dequeue_request = 0; >> - mqd->queue_state.cp_hqd_pq_rptr = 0; >> - mqd->queue_state.cp_hqd_pq_wptr= 0; >> - if (RREG32(mmCP_HQD_ACTIVE) & 1) { >> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); >> - for (j = 0; j < adev->usec_timeout; j++) { >> - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) >> - break; >> - udelay(1); >> - } >> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); >> - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); >> - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); >> - } >> + mqd->header = 0xC0310800; >> + mqd->static_thread_mgmt01[0] = 0xffffffff; >> + mqd->static_thread_mgmt01[1] = 0xffffffff; >> + mqd->static_thread_mgmt23[0] = 0xffffffff; >> + mqd->static_thread_mgmt23[1] = 0xffffffff; >> >> - /* set the pointer to the MQD */ >> - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; >> - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); >> - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); >> - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); >> - /* set MQD vmid to 0 */ >> - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); >> - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; >> - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); >> - >> - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ >> - hqd_gpu_addr = ring->gpu_addr >> 8; >> - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; >> - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); >> - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); >> - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); >> - >> - /* set up the HQD, this is similar to CP_RB0_CNTL */ >> - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); >> - mqd->queue_state.cp_hqd_pq_control &= >> - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | >> - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); >> - >> - mqd->queue_state.cp_hqd_pq_control |= >> - order_base_2(ring->ring_size / 8); >> - mqd->queue_state.cp_hqd_pq_control |= >> - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); >> + /* enable doorbell? */ >> + mqd->queue_state.cp_hqd_pq_doorbell_control = >> + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); >> + if (ring->use_doorbell) >> + mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; >> + else >> + mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; >> + >> + /* set the pointer to the MQD */ >> + mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; >> + mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); >> + >> + /* set MQD vmid to 0 */ >> + mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); >> + mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; >> + >> + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ >> + hqd_gpu_addr = ring->gpu_addr >> 8; >> + mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; >> + mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); >> + >> + /* set up the HQD, this is similar to CP_RB0_CNTL */ >> + mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); >> + mqd->queue_state.cp_hqd_pq_control &= >> + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | >> + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); >> + >> + mqd->queue_state.cp_hqd_pq_control |= >> + order_base_2(ring->ring_size / 8); >> + mqd->queue_state.cp_hqd_pq_control |= >> + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); >> #ifdef __BIG_ENDIAN >> - mqd->queue_state.cp_hqd_pq_control |= >> - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; >> + mqd->queue_state.cp_hqd_pq_control |= >> + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; >> #endif >> - mqd->queue_state.cp_hqd_pq_control &= >> - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | >> + mqd->queue_state.cp_hqd_pq_control &= >> + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | >> CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | >> CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); >> - mqd->queue_state.cp_hqd_pq_control |= >> - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | >> - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ >> - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); >> - >> - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ >> - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); >> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; >> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; >> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); >> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, >> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); >> - >> - /* set the wb address wether it's enabled or not */ >> - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); >> - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; >> - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = >> - upper_32_bits(wb_gpu_addr) & 0xffff; >> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, >> - mqd->queue_state.cp_hqd_pq_rptr_report_addr); >> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, >> - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); >> - >> - /* enable the doorbell if requested */ >> - if (use_doorbell) { >> - mqd->queue_state.cp_hqd_pq_doorbell_control = >> - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); >> - mqd->queue_state.cp_hqd_pq_doorbell_control &= >> - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; >> - mqd->queue_state.cp_hqd_pq_doorbell_control |= >> - (ring->doorbell_index << >> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); >> - mqd->queue_state.cp_hqd_pq_doorbell_control |= >> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; >> - mqd->queue_state.cp_hqd_pq_doorbell_control &= >> - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | >> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); >> + mqd->queue_state.cp_hqd_pq_control |= >> + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | >> + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ >> >> - } else { >> - mqd->queue_state.cp_hqd_pq_doorbell_control = 0; >> + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ >> + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); >> + mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; >> + mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; >> + >> + /* set the wb address wether it's enabled or not */ >> + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); >> + mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; >> + mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = >> + upper_32_bits(wb_gpu_addr) & 0xffff; >> + >> + /* enable the doorbell if requested */ >> + if (ring->use_doorbell) { >> + mqd->queue_state.cp_hqd_pq_doorbell_control = >> + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); >> + mqd->queue_state.cp_hqd_pq_doorbell_control &= >> + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; >> + mqd->queue_state.cp_hqd_pq_doorbell_control |= >> + (ring->doorbell_index << >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); >> + mqd->queue_state.cp_hqd_pq_doorbell_control |= >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; >> + mqd->queue_state.cp_hqd_pq_doorbell_control &= >> + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); >> + >> + } else { >> + mqd->queue_state.cp_hqd_pq_doorbell_control = 0; >> + } >> + >> + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ >> + ring->wptr = 0; >> + mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); >> + mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); >> + >> + /* set the vmid for the queue */ >> + mqd->queue_state.cp_hqd_vmid = 0; >> + >> + /* activate the queue */ >> + mqd->queue_state.cp_hqd_active = 1; >> +} >> + >> +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, >> + struct bonaire_mqd *mqd) >> +{ >> + u32 tmp; >> + >> + /* disable wptr polling */ >> + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); >> + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); >> + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); >> + >> + /* program MQD field to HW */ >> + WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); >> + WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); >> + WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); >> + WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); >> + WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); >> + WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); >> + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); >> + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); >> + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr); >> + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); >> + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control); >> + WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); >> + WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); >> + >> + /* activate the HQD */ >> + WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); >> + >> + return 0; >> +} >> + >> +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) >> +{ >> + int r; >> + u64 mqd_gpu_addr; >> + struct bonaire_mqd *mqd; >> + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; >> + >> + if (ring->mqd_obj == NULL) { >> + r = amdgpu_bo_create(adev, >> + sizeof(struct bonaire_mqd), >> + PAGE_SIZE, true, >> + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> + &ring->mqd_obj); >> + if (r) { >> + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); >> + return r; >> } >> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, >> - mqd->queue_state.cp_hqd_pq_doorbell_control); >> + } >> + >> + r = amdgpu_bo_reserve(ring->mqd_obj, false); >> + if (unlikely(r != 0)) >> + goto out; >> + >> + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, >> + &mqd_gpu_addr); >> + if (r) { >> + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); >> + goto out_unreserve; >> + } >> + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); >> + if (r) { >> + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); >> + goto out_unreserve; >> + } >> + >> + mutex_lock(&adev->srbm_mutex); >> + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); >> >> - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ >> - ring->wptr = 0; >> - mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); >> - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); >> - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); >> + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); >> + gfx_v7_0_mqd_deactivate(adev); >> + gfx_v7_0_mqd_commit(adev, mqd); >> >> - /* set the vmid for the queue */ >> - mqd->queue_state.cp_hqd_vmid = 0; >> - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); >> + cik_srbm_select(adev, 0, 0, 0, 0); >> + mutex_unlock(&adev->srbm_mutex); >> >> - /* activate the queue */ >> - mqd->queue_state.cp_hqd_active = 1; >> - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); >> + amdgpu_bo_kunmap(ring->mqd_obj); >> +out_unreserve: >> + amdgpu_bo_unreserve(ring->mqd_obj); >> +out: >> + return 0; >> +} >> + >> +/** >> + * gfx_v7_0_cp_compute_resume - setup the compute queue registers >> + * >> + * @adev: amdgpu_device pointer >> + * >> + * Program the compute queues and test them to make sure they >> + * are working. >> + * Returns 0 for success, error for failure. >> + */ >> +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) >> +{ >> + int r, i, j; >> + u32 tmp; >> + struct amdgpu_ring *ring; >> >> - cik_srbm_select(adev, 0, 0, 0, 0); >> - mutex_unlock(&adev->srbm_mutex); >> + /* fix up chicken bits */ >> + tmp = RREG32(mmCP_CPF_DEBUG); >> + tmp |= (1 << 23); >> + WREG32(mmCP_CPF_DEBUG, tmp); >> >> - amdgpu_bo_kunmap(ring->mqd_obj); >> - amdgpu_bo_unreserve(ring->mqd_obj); >> + /* init the pipes */ >> + for (i = 0; i < adev->gfx.mec.num_mec; i++) >> + for (j = 0; j < adev->gfx.mec.num_pipe; j++) >> + gfx_v7_0_compute_pipe_init(adev, i, j); >> >> - ring->ready = true; >> + /* init the queues */ >> + for (i = 0; i < adev->gfx.num_compute_rings; i++) { >> + r = gfx_v7_0_compute_queue_init(adev, i); >> + if (r) { >> + gfx_v7_0_cp_compute_fini(adev); >> + return r; >> + } >> } >> >> gfx_v7_0_cp_compute_enable(adev, true); >> >> for (i = 0; i < adev->gfx.num_compute_rings; i++) { >> ring = &adev->gfx.compute_ring[i]; >> - >> + ring->ready = true; >> r = amdgpu_ring_test_ring(ring); >> if (r) >> ring->ready = false; >> } >> >> return 0; >> } >> >> static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable) >> { >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> index 4e63e52..88b85f7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> @@ -46,20 +46,22 @@ >> #include "gca/gfx_8_0_sh_mask.h" >> #include "gca/gfx_8_0_enum.h" >> >> #include "dce/dce_10_0_d.h" >> #include "dce/dce_10_0_sh_mask.h" >> >> #include "smu/smu_7_1_3_d.h" >> >> #define GFX8_NUM_GFX_RINGS 1 >> #define GFX8_NUM_COMPUTE_RINGS 8 >> +#define GFX8_MEC_HPD_SIZE 2048 >> + >> >> #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 >> #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 >> #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 >> #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 >> >> #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) >> #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) >> #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) >> #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) >> @@ -1406,38 +1408,38 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, >> >> return r; >> } >> static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, >> struct amdgpu_irq_src *irq) >> { >> amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); >> amdgpu_ring_fini(ring); >> } >> >> -#define MEC_HPD_SIZE 2048 >> +#define GFX8_MEC_HPD_SIZE 2048 > > Same comment about the rename of this. > Done >> >> static int gfx_v8_0_mec_init(struct amdgpu_device *adev) >> { >> int r; >> u32 *hpd; >> >> /* >> * we assign only 1 pipe because all other pipes will >> * be handled by KFD >> */ >> adev->gfx.mec.num_mec = 1; >> adev->gfx.mec.num_pipe = 1; >> adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; >> >> if (adev->gfx.mec.hpd_eop_obj == NULL) { >> r = amdgpu_bo_create(adev, >> - adev->gfx.mec.num_queue * MEC_HPD_SIZE, >> + adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE, >> PAGE_SIZE, true, >> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> &adev->gfx.mec.hpd_eop_obj); >> if (r) { >> dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> return r; >> } >> } >> >> r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); >> @@ -1452,50 +1454,50 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) >> gfx_v8_0_mec_fini(adev); >> return r; >> } >> r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); >> if (r) { >> dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); >> gfx_v8_0_mec_fini(adev); >> return r; >> } >> >> - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); >> + memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE); >> >> amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> >> return 0; >> } >> >> static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) >> { >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> >> amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); >> } >> >> static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) >> { >> int r; >> u32 *hpd; >> struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> >> - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, >> + r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE, >> AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, >> &kiq->eop_gpu_addr, (void **)&hpd); >> if (r) { >> dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); >> return r; >> } >> >> - memset(hpd, 0, MEC_HPD_SIZE); >> + memset(hpd, 0, GFX8_MEC_HPD_SIZE); >> >> r = amdgpu_bo_reserve(kiq->eop_obj, false); >> if (unlikely(r != 0)) >> dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); >> amdgpu_bo_kunmap(kiq->eop_obj); >> amdgpu_bo_unreserve(kiq->eop_obj); >> >> return 0; >> } >> >> @@ -2148,21 +2150,21 @@ static int gfx_v8_0_sw_init(void *handle) >> DRM_ERROR("Too many (%d) compute rings!\n", i); >> break; >> } >> ring = &adev->gfx.compute_ring[i]; >> ring->ring_obj = NULL; >> ring->use_doorbell = true; >> ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; >> ring->me = 1; /* first MEC */ >> ring->pipe = i / 8; >> ring->queue = i % 8; >> - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); >> + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE); >> sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); >> irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; >> /* type-2 packets are deprecated on MEC, use type-3 instead */ >> r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, >> irq_type); >> if (r) >> return r; >> } >> >> r = gfx_v8_0_kiq_init(adev); >> @@ -4747,52 +4749,50 @@ static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev) >> return r; >> } >> >> static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) >> { >> struct amdgpu_device *adev = ring->adev; >> struct vi_mqd *mqd = ring->mqd_ptr; >> uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; >> uint32_t tmp; >> >> + /* init the mqd struct */ >> + memset(mqd, 0, sizeof(struct vi_mqd)); >> + >> mqd->header = 0xC0310800; >> mqd->compute_pipelinestat_enable = 0x00000001; >> mqd->compute_static_thread_mgmt_se0 = 0xffffffff; >> mqd->compute_static_thread_mgmt_se1 = 0xffffffff; >> mqd->compute_static_thread_mgmt_se2 = 0xffffffff; >> mqd->compute_static_thread_mgmt_se3 = 0xffffffff; >> mqd->compute_misc_reserved = 0x00000003; >> >> eop_base_addr = ring->eop_gpu_addr >> 8; >> mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; >> mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); >> >> /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ >> tmp = RREG32(mmCP_HQD_EOP_CONTROL); >> tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, >> - (order_base_2(MEC_HPD_SIZE / 4) - 1)); >> + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); >> >> mqd->cp_hqd_eop_control = tmp; >> >> /* enable doorbell? */ >> tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), >> CP_HQD_PQ_DOORBELL_CONTROL, >> DOORBELL_EN, >> ring->use_doorbell ? 1 : 0); >> >> mqd->cp_hqd_pq_doorbell_control = tmp; >> >> - /* disable the queue if it's active */ >> - mqd->cp_hqd_dequeue_request = 0; >> - mqd->cp_hqd_pq_rptr = 0; >> - mqd->cp_hqd_pq_wptr = 0; >> - >> /* set the pointer to the MQD */ >> mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; >> mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); >> >> /* set MQD vmid to 0 */ >> tmp = RREG32(mmCP_MQD_CONTROL); >> tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); >> mqd->cp_mqd_control = tmp; >> >> /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ >> @@ -4854,50 +4854,86 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) >> tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); >> tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); >> mqd->cp_hqd_persistent_state = tmp; >> >> /* activate the queue */ >> mqd->cp_hqd_active = 1; >> >> return 0; >> } >> >> -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) >> +static int gfx_v8_0_mqd_deactivate(struct amdgpu_device *adev) >> +{ >> + int i; >> + >> + /* disable the queue if it's active */ >> + if (RREG32(mmCP_HQD_ACTIVE) & 1) { >> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); >> + for (i = 0; i < adev->usec_timeout; i++) { >> + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) >> + break; >> + udelay(1); >> + } >> + >> + if (i == adev->usec_timeout) >> + return -ETIMEDOUT; >> + >> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); >> + WREG32(mmCP_HQD_PQ_RPTR, 0); >> + WREG32(mmCP_HQD_PQ_WPTR, 0); >> + } >> + >> + return 0; >> +} >> + >> +static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable) >> +{ >> + uint32_t tmp; >> + >> + if (!enable) >> + return; >> + >> + if ((adev->asic_type == CHIP_CARRIZO) || >> + (adev->asic_type == CHIP_FIJI) || >> + (adev->asic_type == CHIP_STONEY) || >> + (adev->asic_type == CHIP_POLARIS11) || >> + (adev->asic_type == CHIP_POLARIS10)) { >> + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); >> + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); >> + } >> + >> + tmp = RREG32(mmCP_PQ_STATUS); >> + tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); >> + WREG32(mmCP_PQ_STATUS, tmp); >> +} > > This can be dropped since we've already split out the doorbell range > setting and with KIQ enabled, these registers are only set once > anyway. No need to split it out. > Done. From my understanding of your comment + the code, this just needs to be removed. >> + >> +static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring) >> { >> struct amdgpu_device *adev = ring->adev; >> struct vi_mqd *mqd = ring->mqd_ptr; >> - int j; >> >> /* disable wptr polling */ >> WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); >> >> WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); >> WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); >> >> /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ >> WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); >> >> /* enable doorbell? */ >> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); >> >> - /* disable the queue if it's active */ >> - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { >> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); >> - for (j = 0; j < adev->usec_timeout; j++) { >> - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) >> - break; >> - udelay(1); >> - } >> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); >> - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); >> - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); >> - } >> + /* set pq read/write pointers */ >> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); >> + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); >> + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); >> >> /* set the pointer to the MQD */ >> WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); >> WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); >> >> /* set MQD vmid to 0 */ >> WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); >> >> /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ >> WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); >> @@ -4910,46 +4946,33 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) >> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, >> mqd->cp_hqd_pq_rptr_report_addr_lo); >> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, >> mqd->cp_hqd_pq_rptr_report_addr_hi); >> >> /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ >> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); >> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); >> >> /* enable the doorbell if requested */ >> - if (ring->use_doorbell) { >> - if ((adev->asic_type == CHIP_CARRIZO) || >> - (adev->asic_type == CHIP_FIJI) || >> - (adev->asic_type == CHIP_STONEY)) { >> - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, >> - AMDGPU_DOORBELL_KIQ << 2); >> - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, >> - AMDGPU_DOORBELL_MEC_RING7 << 2); >> - } >> - } >> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); >> >> /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ >> WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); >> >> /* set the vmid for the queue */ >> WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); >> >> WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); >> >> /* activate the queue */ >> WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); >> >> - if (ring->use_doorbell) >> - WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); >> - >> return 0; >> } >> >> static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) >> { >> struct amdgpu_device *adev = ring->adev; >> struct vi_mqd *mqd = ring->mqd_ptr; >> int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; >> >> gfx_v8_0_kiq_setting(ring); >> @@ -4958,29 +4981,32 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) >> /* reset MQD to a clean status */ >> if (adev->gfx.mec.mqd_backup[mqd_idx]) >> memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); >> >> /* reset ring buffer */ >> ring->wptr = 0; >> amdgpu_ring_clear_ring(ring); >> >> mutex_lock(&adev->srbm_mutex); >> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); >> - gfx_v8_0_kiq_init_register(ring); >> + gfx_v8_0_mqd_deactivate(adev); >> + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell); >> + gfx_v8_0_mqd_commit(ring); >> vi_srbm_select(adev, 0, 0, 0, 0); >> mutex_unlock(&adev->srbm_mutex); >> } else { >> - memset((void *)mqd, 0, sizeof(*mqd)); > > Since you memset the mpq in gfx_v8_0_mqd_init(), you can also drop the > memset in gfx_v8_0_kcq_init_queue(). > Done >> mutex_lock(&adev->srbm_mutex); >> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); >> gfx_v8_0_mqd_init(ring); >> - gfx_v8_0_kiq_init_register(ring); >> + gfx_v8_0_mqd_deactivate(adev); >> + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell); >> + gfx_v8_0_mqd_commit(ring); >> vi_srbm_select(adev, 0, 0, 0, 0); >> mutex_unlock(&adev->srbm_mutex); >> >> if (adev->gfx.mec.mqd_backup[mqd_idx]) >> memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); >> } >> >> return 0; >> } >> >> -- >> 2.9.3 >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx