The MQD programming sequence currently exists in 3 different places. Refactor it to absorb all the duplicates. The success path remains mostly identical except for a slightly different order in the non-kiq case. This shouldn't matter if the HQD is disabled. The error handling paths have been updated to deal with the new code structure. Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++++++++++++++++++---------------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 417 +++++++++++-------------------- 2 files changed, 387 insertions(+), 477 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 1f93545..8e1e601 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -42,20 +42,22 @@ #include "gca/gfx_7_2_sh_mask.h" #include "gmc/gmc_7_0_d.h" #include "gmc/gmc_7_0_sh_mask.h" #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" #define GFX7_NUM_GFX_RINGS 1 #define GFX7_NUM_COMPUTE_RINGS 8 +#define GFX7_MEC_HPD_SIZE 2048 + static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); MODULE_FIRMWARE("radeon/bonaire_me.bin"); MODULE_FIRMWARE("radeon/bonaire_ce.bin"); MODULE_FIRMWARE("radeon/bonaire_rlc.bin"); MODULE_FIRMWARE("radeon/bonaire_mec.bin"); @@ -2792,40 +2794,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); adev->gfx.mec.hpd_eop_obj = NULL; } } -#define MEC_HPD_SIZE 2048 - static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; /* * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total * Nonetheless, we assign only 1 pipe because all other pipes will * be handled by KFD */ adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe = 1; adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, + adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); @@ -2841,21 +2841,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) return r; } r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); if (r) { dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); gfx_v7_0_mec_fini(adev); return r; } /* clear memory. Not sure if this is required or not */ - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); + memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); return 0; } struct hqd_registers { u32 cp_mqd_base_addr; @@ -2916,261 +2916,296 @@ struct bonaire_mqd u32 restart[3]; u32 thread_trace_enable; u32 reserved1; u32 user_data[16]; u32 vgtcs_invoke_count[2]; struct hqd_registers queue_state; u32 dequeue_cntr; u32 interrupt_queue[64]; }; -/** - * gfx_v7_0_cp_compute_resume - setup the compute queue registers - * - * @adev: amdgpu_device pointer - * - * Program the compute queues and test them to make sure they - * are working. - * Returns 0 for success, error for failure. - */ -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe) { - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct bonaire_mqd *mqd; - struct amdgpu_ring *ring; - - /* fix up chicken bits */ - tmp = RREG32(mmCP_CPF_DEBUG); - tmp |= (1 << 23); - WREG32(mmCP_CPF_DEBUG, tmp); + u32 tmp; + size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2; - /* init the pipes */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { - int me = (i < 4) ? 1 : 2; - int pipe = (i < 4) ? i : (i - 4); + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); + cik_srbm_select(adev, me, pipe, 0, 0); - cik_srbm_select(adev, me, pipe, 0, 0); + /* write the EOP addr */ + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); - /* write the EOP addr */ - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); + /* set the VMID assigned */ + WREG32(mmCP_HPD_EOP_VMID, 0); - /* set the VMID assigned */ - WREG32(mmCP_HPD_EOP_VMID, 0); + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(mmCP_HPD_EOP_CONTROL); + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); + WREG32(mmCP_HPD_EOP_CONTROL, tmp); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HPD_EOP_CONTROL); - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; - tmp |= order_base_2(MEC_HPD_SIZE / 8); - WREG32(mmCP_HPD_EOP_CONTROL, tmp); - } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); +} - /* init the queues. Just two for now. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) +{ + int i; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct bonaire_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } + /* disable the queue if it's active */ + if (RREG32(mmCP_HQD_ACTIVE) & 1) { + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } + if (i == adev->usec_timeout) + return -ETIMEDOUT; - /* init the mqd struct */ - memset(buf, 0, sizeof(struct bonaire_mqd)); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + } - mqd = (struct bonaire_mqd *)buf; - mqd->header = 0xC0310800; - mqd->static_thread_mgmt01[0] = 0xffffffff; - mqd->static_thread_mgmt01[1] = 0xffffffff; - mqd->static_thread_mgmt23[0] = 0xffffffff; - mqd->static_thread_mgmt23[1] = 0xffffffff; + return 0; +} - mutex_lock(&adev->srbm_mutex); - cik_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, + struct bonaire_mqd *mqd, + uint64_t mqd_gpu_addr, + struct amdgpu_ring *ring) +{ + u64 hqd_gpu_addr; + u64 wb_gpu_addr; - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct bonaire_mqd)); - /* enable doorbell? */ - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - else - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); - - /* disable the queue if it's active */ - mqd->queue_state.cp_hqd_dequeue_request = 0; - mqd->queue_state.cp_hqd_pq_rptr = 0; - mqd->queue_state.cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - } + mqd->header = 0xC0310800; + mqd->static_thread_mgmt01[0] = 0xffffffff; + mqd->static_thread_mgmt01[1] = 0xffffffff; + mqd->static_thread_mgmt23[0] = 0xffffffff; + mqd->static_thread_mgmt23[1] = 0xffffffff; - /* set the pointer to the MQD */ - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); - /* set MQD vmid to 0 */ - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); - - mqd->queue_state.cp_hqd_pq_control |= - order_base_2(ring->ring_size / 8); - mqd->queue_state.cp_hqd_pq_control |= - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); + /* enable doorbell? */ + mqd->queue_state.cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + if (ring->use_doorbell) + mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + else + mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + + /* set the pointer to the MQD */ + mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; + mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + + /* set MQD vmid to 0 */ + mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); + mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; + mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); + mqd->queue_state.cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); + + mqd->queue_state.cp_hqd_pq_control |= + order_base_2(ring->ring_size / 8); + mqd->queue_state.cp_hqd_pq_control |= + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); #ifdef __BIG_ENDIAN - mqd->queue_state.cp_hqd_pq_control |= - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; + mqd->queue_state.cp_hqd_pq_control |= + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; #endif - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | + mqd->queue_state.cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); - mqd->queue_state.cp_hqd_pq_control |= - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->queue_state.cp_hqd_pq_rptr_report_addr); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control |= - (ring->doorbell_index << - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); - mqd->queue_state.cp_hqd_pq_doorbell_control |= - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + mqd->queue_state.cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - } else { - mqd->queue_state.cp_hqd_pq_doorbell_control = 0; + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; + mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set the wb address wether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; + mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + mqd->queue_state.cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + mqd->queue_state.cp_hqd_pq_doorbell_control &= + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; + mqd->queue_state.cp_hqd_pq_doorbell_control |= + (ring->doorbell_index << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); + mqd->queue_state.cp_hqd_pq_doorbell_control |= + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + mqd->queue_state.cp_hqd_pq_doorbell_control &= + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + + } else { + mqd->queue_state.cp_hqd_pq_doorbell_control = 0; + } + + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; + mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->queue_state.cp_hqd_vmid = 0; + + /* activate the queue */ + mqd->queue_state.cp_hqd_active = 1; +} + +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, + struct bonaire_mqd *mqd) +{ + u32 tmp; + + /* disable wptr polling */ + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + + /* program MQD field to HW */ + WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); + WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); + WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); + WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); + WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); + WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control); + WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); + WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); + + /* activate the HQD */ + WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); + + return 0; +} + +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) +{ + int r; + u64 mqd_gpu_addr; + struct bonaire_mqd *mqd; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + if (ring->mqd_obj == NULL) { + r = amdgpu_bo_create(adev, + sizeof(struct bonaire_mqd), + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &ring->mqd_obj); + if (r) { + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto out; + + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, + &mqd_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); + goto out_unreserve; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); + if (r) { + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); + goto out_unreserve; + } + + mutex_lock(&adev->srbm_mutex); + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); + gfx_v7_0_mqd_deactivate(adev); + gfx_v7_0_mqd_commit(adev, mqd); - /* set the vmid for the queue */ - mqd->queue_state.cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - /* activate the queue */ - mqd->queue_state.cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); + amdgpu_bo_kunmap(ring->mqd_obj); +out_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +out: + return 0; +} + +/** + * gfx_v7_0_cp_compute_resume - setup the compute queue registers + * + * @adev: amdgpu_device pointer + * + * Program the compute queues and test them to make sure they + * are working. + * Returns 0 for success, error for failure. + */ +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int r, i, j; + u32 tmp; + struct amdgpu_ring *ring; - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + /* fix up chicken bits */ + tmp = RREG32(mmCP_CPF_DEBUG); + tmp |= (1 << 23); + WREG32(mmCP_CPF_DEBUG, tmp); - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); + /* init the pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; i++) + for (j = 0; j < adev->gfx.mec.num_pipe; j++) + gfx_v7_0_compute_pipe_init(adev, i, j); - ring->ready = true; + /* init the queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + r = gfx_v7_0_compute_queue_init(adev, i); + if (r) { + gfx_v7_0_cp_compute_fini(adev); + return r; + } } gfx_v7_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - + ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; } return 0; } static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 67afc90..1c8589a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -46,20 +46,22 @@ #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_enum.h" #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" #include "smu/smu_7_1_3_d.h" #define GFX8_NUM_GFX_RINGS 1 #define GFX8_NUM_COMPUTE_RINGS 8 +#define GFX8_MEC_HPD_SIZE 2048 + #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) @@ -1409,38 +1411,38 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { if (amdgpu_sriov_vf(ring->adev)) amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); irq->data = NULL; } -#define MEC_HPD_SIZE 2048 +#define GFX8_MEC_HPD_SIZE 2048 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; /* * we assign only 1 pipe because all other pipes will * be handled by KFD */ adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe = 1; adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, + adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } } r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); @@ -1455,21 +1457,21 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) gfx_v8_0_mec_fini(adev); return r; } r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); if (r) { dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); gfx_v8_0_mec_fini(adev); return r; } - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); + memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); return 0; } static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -1477,29 +1479,29 @@ static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); kiq->eop_obj = NULL; } static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) { int r; u32 *hpd; struct amdgpu_kiq *kiq = &adev->gfx.kiq; - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, &kiq->eop_gpu_addr, (void **)&hpd); if (r) { dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); return r; } - memset(hpd, 0, MEC_HPD_SIZE); + memset(hpd, 0, GFX8_MEC_HPD_SIZE); amdgpu_bo_kunmap(kiq->eop_obj); return 0; } static const u32 vgpr_init_compute_shader[] = { 0x7e000209, 0x7e020208, 0x7e040207, 0x7e060206, @@ -4658,56 +4660,54 @@ static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, struct vi_mqd *mqd, uint64_t mqd_gpu_addr, uint64_t eop_gpu_addr, struct amdgpu_ring *ring) { uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; uint32_t tmp; + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct vi_mqd)); + mqd->header = 0xC0310800; mqd->compute_pipelinestat_enable = 0x00000001; mqd->compute_static_thread_mgmt_se0 = 0xffffffff; mqd->compute_static_thread_mgmt_se1 = 0xffffffff; mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; eop_base_addr = eop_gpu_addr >> 8; mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32(mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); if (ring->use_doorbell) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); else tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); mqd->cp_hqd_pq_doorbell_control = tmp; - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr = 0; - /* set the pointer to the MQD */ mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); /* set MQD vmid to 0 */ tmp = RREG32(mmCP_MQD_CONTROL); tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ @@ -4769,53 +4769,87 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; /* activate the queue */ mqd->cp_hqd_active = 1; return 0; } -static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, - struct vi_mqd *mqd, - struct amdgpu_ring *ring) +static int gfx_v8_0_mqd_deactivate(struct amdgpu_device *adev) +{ + int i; + + /* disable the queue if it's active */ + if (RREG32(mmCP_HQD_ACTIVE) & 1) { + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + + if (i == adev->usec_timeout) + return -ETIMEDOUT; + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + } + + return 0; +} + +static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable) +{ + uint32_t tmp; + + if (!enable) + return; + + if ((adev->asic_type == CHIP_CARRIZO) || + (adev->asic_type == CHIP_FIJI) || + (adev->asic_type == CHIP_STONEY) || + (adev->asic_type == CHIP_POLARIS11) || + (adev->asic_type == CHIP_POLARIS10)) { + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); + } + + tmp = RREG32(mmCP_PQ_STATUS); + tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + WREG32(mmCP_PQ_STATUS, tmp); +} + +static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd) { uint32_t tmp; - int j; /* disable wptr polling */ tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); - /* disable the queue if it's active */ - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - } + /* set pq read/write pointers */ + WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); /* set the pointer to the MQD */ WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); @@ -4828,78 +4862,65 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ - if (ring->use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - } WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); /* set the vmid for the queue */ WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - if (ring->use_doorbell) { - tmp = RREG32(mmCP_PQ_STATUS); - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - WREG32(mmCP_PQ_STATUS, tmp); - } - return 0; } -static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, +static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring, struct vi_mqd *mqd, u64 mqd_gpu_addr) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq; uint64_t eop_gpu_addr; bool is_kiq = false; if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) is_kiq = true; if (is_kiq) { eop_gpu_addr = kiq->eop_gpu_addr; gfx_v8_0_kiq_setting(&kiq->ring); } else eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + - ring->queue * MEC_HPD_SIZE; + ring->queue * GFX8_MEC_HPD_SIZE; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); - if (is_kiq) - gfx_v8_0_kiq_init_register(adev, mqd, ring); + if (is_kiq) { + gfx_v8_0_mqd_deactivate(adev); + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell); + gfx_v8_0_mqd_commit(adev, mqd); + } vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (is_kiq) gfx_v8_0_kiq_enable(ring); else gfx_v8_0_map_queue_enable(&kiq->ring, ring); return 0; @@ -4922,33 +4943,34 @@ static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev) } static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { struct vi_mqd *mqd; u64 mqd_gpu_addr; u32 *buf; int r = 0; - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &mqd_gpu_addr, (void **)&buf); + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &ring->mqd_obj, &mqd_gpu_addr, + (void **)&buf); if (r) { dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); return r; } /* init the mqd struct */ memset(buf, 0, sizeof(struct vi_mqd)); mqd = (struct vi_mqd *)buf; - r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr); + r = gfx_v8_0_kiq_queue_init(ring, mqd, mqd_gpu_addr); if (r) return r; amdgpu_bo_kunmap(ring->mqd_obj); return 0; } static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { @@ -4980,260 +5002,113 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) ring = &adev->gfx.kiq.ring; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; return 0; } -static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) +static int gfx_v8_0_compute_queue_init(struct amdgpu_device *adev, + int ring_id) { - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; + int r; u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; + u64 mqd_gpu_addr; struct vi_mqd *mqd; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; - /* init the queues. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct vi_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); + if (ring->mqd_obj == NULL) { + r = amdgpu_bo_create(adev, + sizeof(struct vi_mqd), + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + NULL, &ring->mqd_obj); if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); return r; } + } - /* init the mqd struct */ - memset(buf, 0, sizeof(struct vi_mqd)); - - mqd = (struct vi_mqd *)buf; - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); - - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - eop_gpu_addr >>= 8; - - /* write the EOP addr */ - WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); - - /* set the VMID assigned */ - WREG32(mmCP_HQD_VMID, 0); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HQD_EOP_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32(mmCP_HQD_EOP_CONTROL, tmp); - - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); - - mqd->cp_hqd_eop_base_addr_lo = - RREG32(mmCP_HQD_EOP_BASE_ADDR); - mqd->cp_hqd_eop_base_addr_hi = - RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); - - /* enable doorbell? */ - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); - mqd->cp_hqd_pq_doorbell_control = tmp; - - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - } + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto out; - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - tmp = RREG32(mmCP_MQD_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32(mmCP_MQD_CONTROL, tmp); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32(mmCP_HQD_PQ_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32(mmCP_HQD_PQ_CONTROL, tmp); - mqd->cp_hqd_pq_control = tmp; - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS12)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - mqd->cp_hqd_pq_doorbell_control = tmp; + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, + &mqd_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); + goto out_unreserve; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); + if (r) { + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); + goto out_unreserve; + } - } else { - mqd->cp_hqd_pq_doorbell_control = 0; - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->cp_hqd_pq_wptr = ring->wptr; - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); - - /* set the vmid for the queue */ - mqd->cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); - - tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); - WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); - mqd->cp_hqd_persistent_state = tmp; - if (adev->asic_type == CHIP_STONEY || - adev->asic_type == CHIP_POLARIS11 || - adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12) { - tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); - WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); - } + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE); + eop_gpu_addr >>= 8; + + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct vi_mqd)); + + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); - /* activate the queue */ - mqd->cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); + gfx_v8_0_mqd_deactivate(adev); + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell); + gfx_v8_0_mqd_commit(adev, mqd); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + amdgpu_bo_kunmap(ring->mqd_obj); +out_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +out: + return r; +} - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); +static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int r, i; + u32 tmp; + struct amdgpu_ring *ring; + + /* Stating with gfxv8, all the pipe specific state was removed + * The fields have been moved to be per-HQD now. */ + + /* init the queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + r = gfx_v8_0_compute_queue_init(adev, i); + if (r) { + gfx_v8_0_cp_compute_fini(adev); + return r; + } } - if (use_doorbell) { - tmp = RREG32(mmCP_PQ_STATUS); - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - WREG32(mmCP_PQ_STATUS, tmp); + if (adev->asic_type == CHIP_STONEY || + adev->asic_type == CHIP_POLARIS11 || + adev->asic_type == CHIP_POLARIS10 || + adev->asic_type == CHIP_POLARIS12) { + tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); + WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); } gfx_v8_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + ring = &adev->gfx.compute_ring[i]; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; } return 0; } -- 2.7.4