On Wed, Jun 26, 2013 at 09:22:12AM -0400, alexdeucher@xxxxxxxxx wrote: > From: Alex Deucher <alexander.deucher@xxxxxxx> > > On CIK, the compute rings work slightly differently than > on previous asics, however the basic concepts are the same. > > The main differences: > - New MEC engines for compute queues > - Multiple queues per MEC: > - CI/KB: 1 MEC, 4 pipes per MEC, 8 queues per pipe = 32 queues > - KV: 2 MEC, 4 pipes per MEC, 8 queues per pipe = 64 queues > - Queues can be allocated and scheduled by another queue > - New doorbell aperture allows you to assign space in the aperture > for the wptr which allows for userspace access to queues > > v2: add wptr shadow, fix eop setup > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Minor nitpick below otherwise Reviewed-by: Jerome Glisse <jglisse@xxxxxxxxxx> > --- > drivers/gpu/drm/radeon/cik.c | 528 +++++++++++++++++++++++++++++++++++- > drivers/gpu/drm/radeon/cikd.h | 62 +++++ > drivers/gpu/drm/radeon/radeon.h | 19 ++ > drivers/gpu/drm/radeon/radeon_cs.c | 4 +- > 4 files changed, 601 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c > index 5c28fa5..9d2d6bb 100644 > --- a/drivers/gpu/drm/radeon/cik.c > +++ b/drivers/gpu/drm/radeon/cik.c > @@ -1687,6 +1687,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) > radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); > radeon_ring_write(ring, 0xDEADBEEF); > radeon_ring_unlock_commit(rdev, ring); > + > for (i = 0; i < rdev->usec_timeout; i++) { > tmp = RREG32(scratch); > if (tmp == 0xDEADBEEF) > @@ -2112,6 +2113,51 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev) > return 0; > } > > +static u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, > + struct radeon_ring *ring) > +{ > + u32 rptr; > + > + > + > + if (rdev->wb.enabled) { > + rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); > + } else { > + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); > + rptr = RREG32(CP_HQD_PQ_RPTR); > + cik_srbm_select(rdev, 0, 0, 0, 0); > + } > + rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; > + > + return rptr; > +} > + > +static u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, > + struct radeon_ring *ring) > +{ > + u32 wptr; > + > + if (rdev->wb.enabled) { > + wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); > + } else { > + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); > + wptr = RREG32(CP_HQD_PQ_WPTR); > + cik_srbm_select(rdev, 0, 0, 0, 0); > + } > + wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; > + > + return wptr; > +} > + > +static void cik_compute_ring_set_wptr(struct radeon_device *rdev, > + struct radeon_ring *ring) > +{ > + u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask; > + > + rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr); > + WDOORBELL32(ring->doorbell_offset, wptr); > +} > + > /** > * cik_cp_compute_enable - enable/disable the compute CP MEs > * > @@ -2176,7 +2222,8 @@ static int cik_cp_compute_load_microcode(struct radeon_device *rdev) > */ > static int cik_cp_compute_start(struct radeon_device *rdev) > { > - //todo > + cik_cp_compute_enable(rdev, true); > + > return 0; > } > > @@ -2190,10 +2237,171 @@ static int cik_cp_compute_start(struct radeon_device *rdev) > */ > static void cik_cp_compute_fini(struct radeon_device *rdev) > { > + int i, idx, r; > + > cik_cp_compute_enable(rdev, false); > - //todo > + > + for (i = 0; i < 2; i++) { > + if (i == 0) > + idx = CAYMAN_RING_TYPE_CP1_INDEX; > + else > + idx = CAYMAN_RING_TYPE_CP2_INDEX; > + > + if (rdev->ring[idx].mqd_obj) { > + r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); > + if (unlikely(r != 0)) > + dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); > + > + radeon_bo_unpin(rdev->ring[idx].mqd_obj); > + radeon_bo_unreserve(rdev->ring[idx].mqd_obj); > + > + radeon_bo_unref(&rdev->ring[idx].mqd_obj); > + rdev->ring[idx].mqd_obj = NULL; > + } > + } > +} > + > +static void cik_mec_fini(struct radeon_device *rdev) > +{ > + int r; > + > + if (rdev->mec.hpd_eop_obj) { > + r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); > + if (unlikely(r != 0)) > + dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); > + radeon_bo_unpin(rdev->mec.hpd_eop_obj); > + radeon_bo_unreserve(rdev->mec.hpd_eop_obj); > + > + radeon_bo_unref(&rdev->mec.hpd_eop_obj); > + rdev->mec.hpd_eop_obj = NULL; > + } > +} > + > +#define MEC_HPD_SIZE 2048 > + > +static int cik_mec_init(struct radeon_device *rdev) > +{ > + int r; > + u32 *hpd; > + > + /* > + * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total > + * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total > + */ > + if (rdev->family == CHIP_KAVERI) > + rdev->mec.num_mec = 2; > + else > + rdev->mec.num_mec = 1; > + rdev->mec.num_pipe = 4; > + rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; > + > + if (rdev->mec.hpd_eop_obj == NULL) { > + r = radeon_bo_create(rdev, > + rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, > + PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_GTT, NULL, > + &rdev->mec.hpd_eop_obj); > + if (r) { > + dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); > + return r; > + } > + } > + > + r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); > + if (unlikely(r != 0)) { > + cik_mec_fini(rdev); > + return r; > + } > + r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, > + &rdev->mec.hpd_eop_gpu_addr); > + if (r) { > + dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); > + cik_mec_fini(rdev); > + return r; > + } > + r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); > + if (r) { > + dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); > + cik_mec_fini(rdev); > + return r; > + } > + > + /* clear memory. Not sure if this is required or not */ > + memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); > + > + radeon_bo_kunmap(rdev->mec.hpd_eop_obj); > + radeon_bo_unreserve(rdev->mec.hpd_eop_obj); > + > + return 0; > } > > +struct hqd_registers > +{ > + u32 cp_mqd_base_addr; > + u32 cp_mqd_base_addr_hi; > + u32 cp_hqd_active; > + u32 cp_hqd_vmid; > + u32 cp_hqd_persistent_state; > + u32 cp_hqd_pipe_priority; > + u32 cp_hqd_queue_priority; > + u32 cp_hqd_quantum; > + u32 cp_hqd_pq_base; > + u32 cp_hqd_pq_base_hi; > + u32 cp_hqd_pq_rptr; > + u32 cp_hqd_pq_rptr_report_addr; > + u32 cp_hqd_pq_rptr_report_addr_hi; > + u32 cp_hqd_pq_wptr_poll_addr; > + u32 cp_hqd_pq_wptr_poll_addr_hi; > + u32 cp_hqd_pq_doorbell_control; > + u32 cp_hqd_pq_wptr; > + u32 cp_hqd_pq_control; > + u32 cp_hqd_ib_base_addr; > + u32 cp_hqd_ib_base_addr_hi; > + u32 cp_hqd_ib_rptr; > + u32 cp_hqd_ib_control; > + u32 cp_hqd_iq_timer; > + u32 cp_hqd_iq_rptr; > + u32 cp_hqd_dequeue_request; > + u32 cp_hqd_dma_offload; > + u32 cp_hqd_sema_cmd; > + u32 cp_hqd_msg_type; > + u32 cp_hqd_atomic0_preop_lo; > + u32 cp_hqd_atomic0_preop_hi; > + u32 cp_hqd_atomic1_preop_lo; > + u32 cp_hqd_atomic1_preop_hi; > + u32 cp_hqd_hq_scheduler0; > + u32 cp_hqd_hq_scheduler1; > + u32 cp_mqd_control; > +}; > + > +struct bonaire_mqd > +{ > + u32 header; > + u32 dispatch_initiator; > + u32 dimensions[3]; > + u32 start_idx[3]; > + u32 num_threads[3]; > + u32 pipeline_stat_enable; > + u32 perf_counter_enable; > + u32 pgm[2]; > + u32 tba[2]; > + u32 tma[2]; > + u32 pgm_rsrc[2]; > + u32 vmid; > + u32 resource_limits; > + u32 static_thread_mgmt01[2]; > + u32 tmp_ring_size; > + u32 static_thread_mgmt23[2]; > + u32 restart[3]; > + u32 thread_trace_enable; > + u32 reserved1; > + u32 user_data[16]; > + u32 vgtcs_invoke_count[2]; > + struct hqd_registers queue_state; > + u32 dequeue_cntr; > + u32 interrupt_queue[64]; > +}; > + > /** > * cik_cp_compute_resume - setup the compute queue registers > * > @@ -2205,24 +2413,247 @@ static void cik_cp_compute_fini(struct radeon_device *rdev) > */ > static int cik_cp_compute_resume(struct radeon_device *rdev) > { > - int r; > + int r, i, idx; > + u32 tmp; > + bool use_doorbell = true; > + u64 hqd_gpu_addr; > + u64 mqd_gpu_addr; > + u64 eop_gpu_addr; > + u64 wb_gpu_addr; > + u32 *buf; > + struct bonaire_mqd *mqd; > > - //todo > r = cik_cp_compute_start(rdev); > if (r) > return r; > + > + /* fix up chicken bits */ > + tmp = RREG32(CP_CPF_DEBUG); > + tmp |= (1 << 23); What is this chicken bit ? Is it tasty ? > + WREG32(CP_CPF_DEBUG, tmp); > + > + /* init the pipes */ > + for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { > + int me = (i < 4) ? 1 : 2; > + int pipe = (i < 4) ? i : (i - 4); > + > + eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); > + > + cik_srbm_select(rdev, me, pipe, 0, 0); > + > + /* write the EOP addr */ > + WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); > + WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); > + > + /* set the VMID assigned */ > + WREG32(CP_HPD_EOP_VMID, 0); > + > + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > + tmp = RREG32(CP_HPD_EOP_CONTROL); > + tmp &= ~EOP_SIZE_MASK; > + tmp |= drm_order(MEC_HPD_SIZE / 8); > + WREG32(CP_HPD_EOP_CONTROL, tmp); > + } > + cik_srbm_select(rdev, 0, 0, 0, 0); > + > + /* init the queues. Just two for now. */ > + for (i = 0; i < 2; i++) { > + if (i == 0) > + idx = CAYMAN_RING_TYPE_CP1_INDEX; > + else > + idx = CAYMAN_RING_TYPE_CP2_INDEX; > + > + if (rdev->ring[idx].mqd_obj == NULL) { > + r = radeon_bo_create(rdev, > + sizeof(struct bonaire_mqd), > + PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_GTT, NULL, > + &rdev->ring[idx].mqd_obj); > + if (r) { > + dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); > + return r; > + } > + } > + > + r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); > + if (unlikely(r != 0)) { > + cik_cp_compute_fini(rdev); > + return r; > + } > + r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, > + &mqd_gpu_addr); > + if (r) { > + dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); > + cik_cp_compute_fini(rdev); > + return r; > + } > + r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); > + if (r) { > + dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); > + cik_cp_compute_fini(rdev); > + return r; > + } > + > + /* doorbell offset */ > + rdev->ring[idx].doorbell_offset = > + (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; > + > + /* init the mqd struct */ > + memset(buf, 0, sizeof(struct bonaire_mqd)); > + > + mqd = (struct bonaire_mqd *)buf; > + mqd->header = 0xC0310800; > + mqd->static_thread_mgmt01[0] = 0xffffffff; > + mqd->static_thread_mgmt01[1] = 0xffffffff; > + mqd->static_thread_mgmt23[0] = 0xffffffff; > + mqd->static_thread_mgmt23[1] = 0xffffffff; > + > + cik_srbm_select(rdev, rdev->ring[idx].me, > + rdev->ring[idx].pipe, > + rdev->ring[idx].queue, 0); > + > + /* disable wptr polling */ > + tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); > + tmp &= ~WPTR_POLL_EN; > + WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); > + > + /* enable doorbell? */ > + mqd->queue_state.cp_hqd_pq_doorbell_control = > + RREG32(CP_HQD_PQ_DOORBELL_CONTROL); > + if (use_doorbell) > + mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; > + else > + mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; > + WREG32(CP_HQD_PQ_DOORBELL_CONTROL, > + mqd->queue_state.cp_hqd_pq_doorbell_control); > + > + /* disable the queue if it's active */ > + mqd->queue_state.cp_hqd_dequeue_request = 0; > + mqd->queue_state.cp_hqd_pq_rptr = 0; > + mqd->queue_state.cp_hqd_pq_wptr= 0; > + if (RREG32(CP_HQD_ACTIVE) & 1) { > + WREG32(CP_HQD_DEQUEUE_REQUEST, 1); > + for (i = 0; i < rdev->usec_timeout; i++) { > + if (!(RREG32(CP_HQD_ACTIVE) & 1)) > + break; > + udelay(1); > + } > + WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); > + WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); > + WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); > + } > + > + /* set the pointer to the MQD */ > + mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; > + mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); > + WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); > + WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); > + /* set MQD vmid to 0 */ > + mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); > + mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; > + WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); > + > + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > + hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; > + mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; > + mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); > + WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); > + WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); > + > + /* set up the HQD, this is similar to CP_RB0_CNTL */ > + mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); > + mqd->queue_state.cp_hqd_pq_control &= > + ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); > + > + mqd->queue_state.cp_hqd_pq_control |= > + drm_order(rdev->ring[idx].ring_size / 8); > + mqd->queue_state.cp_hqd_pq_control |= > + (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8); > +#ifdef __BIG_ENDIAN > + mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; > +#endif > + mqd->queue_state.cp_hqd_pq_control &= > + ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); > + mqd->queue_state.cp_hqd_pq_control |= > + PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ > + WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); > + > + /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ > + if (i == 0) > + wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; > + else > + wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; > + mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; > + mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; > + WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); > + WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, > + mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); > + > + /* set the wb address wether it's enabled or not */ > + if (i == 0) > + wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; > + else > + wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; > + mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; > + mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = > + upper_32_bits(wb_gpu_addr) & 0xffff; > + WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, > + mqd->queue_state.cp_hqd_pq_rptr_report_addr); > + WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, > + mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); > + > + /* enable the doorbell if requested */ > + if (use_doorbell) { > + mqd->queue_state.cp_hqd_pq_doorbell_control = > + RREG32(CP_HQD_PQ_DOORBELL_CONTROL); > + mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; > + mqd->queue_state.cp_hqd_pq_doorbell_control |= > + DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); > + mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; > + mqd->queue_state.cp_hqd_pq_doorbell_control &= > + ~(DOORBELL_SOURCE | DOORBELL_HIT); > + > + } else { > + mqd->queue_state.cp_hqd_pq_doorbell_control = 0; > + } > + WREG32(CP_HQD_PQ_DOORBELL_CONTROL, > + mqd->queue_state.cp_hqd_pq_doorbell_control); > + > + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > + rdev->ring[idx].wptr = 0; > + mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; > + WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); > + rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); > + mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; > + > + /* set the vmid for the queue */ > + mqd->queue_state.cp_hqd_vmid = 0; > + WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); > + > + /* activate the queue */ > + mqd->queue_state.cp_hqd_active = 1; > + WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); > + > + cik_srbm_select(rdev, 0, 0, 0, 0); > + > + radeon_bo_kunmap(rdev->ring[idx].mqd_obj); > + radeon_bo_unreserve(rdev->ring[idx].mqd_obj); > + > + rdev->ring[idx].ready = true; > + r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); > + if (r) > + rdev->ring[idx].ready = false; > + } > + > return 0; > } > > -/* XXX temporary wrappers to handle both compute and gfx */ > -/* XXX */ > static void cik_cp_enable(struct radeon_device *rdev, bool enable) > { > cik_cp_gfx_enable(rdev, enable); > cik_cp_compute_enable(rdev, enable); > } > > -/* XXX */ > static int cik_cp_load_microcode(struct radeon_device *rdev) > { > int r; > @@ -2237,14 +2668,12 @@ static int cik_cp_load_microcode(struct radeon_device *rdev) > return 0; > } > > -/* XXX */ > static void cik_cp_fini(struct radeon_device *rdev) > { > cik_cp_gfx_fini(rdev); > cik_cp_compute_fini(rdev); > } > > -/* XXX */ > static int cik_cp_resume(struct radeon_device *rdev) > { > int r; > @@ -2865,6 +3294,22 @@ static void cik_print_gpu_status_regs(struct radeon_device *rdev) > RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); > dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", > RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); > + dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); > + dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", > + RREG32(CP_STALLED_STAT1)); > + dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", > + RREG32(CP_STALLED_STAT2)); > + dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", > + RREG32(CP_STALLED_STAT3)); > + dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", > + RREG32(CP_CPF_BUSY_STAT)); > + dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", > + RREG32(CP_CPF_STALLED_STAT1)); > + dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); > + dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); > + dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", > + RREG32(CP_CPC_STALLED_STAT1)); > + dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); > } > > /** > @@ -4952,12 +5397,31 @@ static int cik_startup(struct radeon_device *rdev) > if (r) > return r; > > + /* allocate rlc buffers */ Init mec not allocate rlc ... > + r = cik_mec_init(rdev); > + if (r) { > + DRM_ERROR("Failed to init MEC BOs!\n"); > + return r; > + } > + > r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); > if (r) { > dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); > return r; > } > > + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); > + if (r) { > + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); > + return r; > + } > + > + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); > + if (r) { > + dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); > + return r; > + } > + > r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); > if (r) { > dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); > @@ -5002,6 +5466,36 @@ static int cik_startup(struct radeon_device *rdev) > if (r) > return r; > > + /* set up the compute queues */ > + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; > + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, > + CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (r) > + return r; > + ring->me = 1; /* first MEC */ > + ring->pipe = 0; /* first pipe */ > + ring->queue = 0; /* first queue */ > + ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; > + ring->funcs.get_rptr = &cik_compute_ring_get_rptr; > + ring->funcs.get_wptr = &cik_compute_ring_get_wptr; > + ring->funcs.set_wptr = &cik_compute_ring_set_wptr; > + > + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; > + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, > + CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, > + 0, 0xffffffff, RADEON_CP_PACKET2); > + if (r) > + return r; > + /* dGPU only have 1 MEC */ > + ring->me = 1; /* first MEC */ > + ring->pipe = 0; /* first pipe */ > + ring->queue = 1; /* second queue */ > + ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; > + ring->funcs.get_rptr = &cik_compute_ring_get_rptr; > + ring->funcs.get_wptr = &cik_compute_ring_get_wptr; > + ring->funcs.set_wptr = &cik_compute_ring_set_wptr; > + > ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; > r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, > SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, > @@ -5176,6 +5670,20 @@ int cik_init(struct radeon_device *rdev) > ring->ring_obj = NULL; > r600_ring_init(rdev, ring, 1024 * 1024); > > + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; > + ring->ring_obj = NULL; > + r600_ring_init(rdev, ring, 1024 * 1024); > + r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); > + if (r) > + return r; > + > + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; > + ring->ring_obj = NULL; > + r600_ring_init(rdev, ring, 1024 * 1024); > + r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); > + if (r) > + return r; > + > ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; > ring->ring_obj = NULL; > r600_ring_init(rdev, ring, 256 * 1024); > @@ -5206,6 +5714,7 @@ int cik_init(struct radeon_device *rdev) > cik_sdma_fini(rdev); > cik_irq_fini(rdev); > si_rlc_fini(rdev); > + cik_mec_fini(rdev); > radeon_wb_fini(rdev); > radeon_ib_pool_fini(rdev); > radeon_vm_manager_fini(rdev); > @@ -5241,6 +5750,7 @@ void cik_fini(struct radeon_device *rdev) > cik_sdma_fini(rdev); > cik_irq_fini(rdev); > si_rlc_fini(rdev); > + cik_mec_fini(rdev); > radeon_wb_fini(rdev); > radeon_vm_manager_fini(rdev); > radeon_ib_pool_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h > index 79be39e..63514b9 100644 > --- a/drivers/gpu/drm/radeon/cikd.h > +++ b/drivers/gpu/drm/radeon/cikd.h > @@ -460,6 +460,13 @@ > # define RDERR_INT_ENABLE (1 << 0) > # define GUI_IDLE_INT_ENABLE (1 << 19) > > +#define CP_CPC_STATUS 0x8210 > +#define CP_CPC_BUSY_STAT 0x8214 > +#define CP_CPC_STALLED_STAT1 0x8218 > +#define CP_CPF_STATUS 0x821c > +#define CP_CPF_BUSY_STAT 0x8220 > +#define CP_CPF_STALLED_STAT1 0x8224 > + > #define CP_MEC_CNTL 0x8234 > #define MEC_ME2_HALT (1 << 28) > #define MEC_ME1_HALT (1 << 30) > @@ -468,6 +475,12 @@ > #define MEC_ME2_HALT (1 << 28) > #define MEC_ME1_HALT (1 << 30) > > +#define CP_STALLED_STAT3 0x8670 > +#define CP_STALLED_STAT1 0x8674 > +#define CP_STALLED_STAT2 0x8678 > + > +#define CP_STAT 0x8680 > + > #define CP_ME_CNTL 0x86D8 > #define CP_CE_HALT (1 << 24) > #define CP_PFP_HALT (1 << 26) > @@ -701,6 +714,11 @@ > # define CP_RINGID1_INT_STAT (1 << 30) > # define CP_RINGID0_INT_STAT (1 << 31) > > +#define CP_CPF_DEBUG 0xC200 > + > +#define CP_PQ_WPTR_POLL_CNTL 0xC20C > +#define WPTR_POLL_EN (1 << 31) > + > #define CP_ME1_PIPE0_INT_CNTL 0xC214 > #define CP_ME1_PIPE1_INT_CNTL 0xC218 > #define CP_ME1_PIPE2_INT_CNTL 0xC21C > @@ -773,6 +791,50 @@ > #define RLC_GPM_SCRATCH_ADDR 0xC4B0 > #define RLC_GPM_SCRATCH_DATA 0xC4B4 > > +#define CP_HPD_EOP_BASE_ADDR 0xC904 > +#define CP_HPD_EOP_BASE_ADDR_HI 0xC908 > +#define CP_HPD_EOP_VMID 0xC90C > +#define CP_HPD_EOP_CONTROL 0xC910 > +#define EOP_SIZE(x) ((x) << 0) > +#define EOP_SIZE_MASK (0x3f << 0) > +#define CP_MQD_BASE_ADDR 0xC914 > +#define CP_MQD_BASE_ADDR_HI 0xC918 > +#define CP_HQD_ACTIVE 0xC91C > +#define CP_HQD_VMID 0xC920 > + > +#define CP_HQD_PQ_BASE 0xC934 > +#define CP_HQD_PQ_BASE_HI 0xC938 > +#define CP_HQD_PQ_RPTR 0xC93C > +#define CP_HQD_PQ_RPTR_REPORT_ADDR 0xC940 > +#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI 0xC944 > +#define CP_HQD_PQ_WPTR_POLL_ADDR 0xC948 > +#define CP_HQD_PQ_WPTR_POLL_ADDR_HI 0xC94C > +#define CP_HQD_PQ_DOORBELL_CONTROL 0xC950 > +#define DOORBELL_OFFSET(x) ((x) << 2) > +#define DOORBELL_OFFSET_MASK (0x1fffff << 2) > +#define DOORBELL_SOURCE (1 << 28) > +#define DOORBELL_SCHD_HIT (1 << 29) > +#define DOORBELL_EN (1 << 30) > +#define DOORBELL_HIT (1 << 31) > +#define CP_HQD_PQ_WPTR 0xC954 > +#define CP_HQD_PQ_CONTROL 0xC958 > +#define QUEUE_SIZE(x) ((x) << 0) > +#define QUEUE_SIZE_MASK (0x3f << 0) > +#define RPTR_BLOCK_SIZE(x) ((x) << 8) > +#define RPTR_BLOCK_SIZE_MASK (0x3f << 8) > +#define PQ_VOLATILE (1 << 26) > +#define NO_UPDATE_RPTR (1 << 27) > +#define UNORD_DISPATCH (1 << 28) > +#define ROQ_PQ_IB_FLIP (1 << 29) > +#define PRIV_STATE (1 << 30) > +#define KMD_QUEUE (1 << 31) > + > +#define CP_HQD_DEQUEUE_REQUEST 0xC974 > + > +#define CP_MQD_CONTROL 0xC99C > +#define MQD_VMID(x) ((x) << 0) > +#define MQD_VMID_MASK (0xf << 0) > + > #define PA_SC_RASTER_CONFIG 0x28350 > # define RASTER_CONFIG_RB_MAP_0 0 > # define RASTER_CONFIG_RB_MAP_1 1 > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index a2a3430..d40d506 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -714,6 +714,22 @@ struct radeon_ring { > u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); > void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); > } funcs; > + /* for CIK queues */ > + u32 me; > + u32 pipe; > + u32 queue; > + struct radeon_bo *mqd_obj; > + u32 doorbell_page_num; > + u32 doorbell_offset; > + unsigned wptr_offs; > +}; > + > +struct radeon_mec { > + struct radeon_bo *hpd_eop_obj; > + u64 hpd_eop_gpu_addr; > + u32 num_pipe; > + u32 num_mec; > + u32 num_queue; > }; > > /* > @@ -971,6 +987,8 @@ struct radeon_wb { > #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 > #define R600_WB_UVD_RPTR_OFFSET 2560 > #define R600_WB_EVENT_OFFSET 3072 > +#define CIK_WB_CP1_WPTR_OFFSET 3328 > +#define CIK_WB_CP2_WPTR_OFFSET 3584 > > /** > * struct radeon_pm - power management datas > @@ -1760,6 +1778,7 @@ struct radeon_device { > int msi_enabled; /* msi enabled */ > struct r600_ih ih; /* r6/700 interrupt ring */ > struct si_rlc rlc; > + struct radeon_mec mec; > struct work_struct hotplug_work; > struct work_struct audio_work; > struct work_struct reset_work; > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index cf71734..7e265a5 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -121,9 +121,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority > p->ring = RADEON_RING_TYPE_GFX_INDEX; > break; > case RADEON_CS_RING_COMPUTE: > - if (p->rdev->family >= CHIP_BONAIRE) > - p->ring = RADEON_RING_TYPE_GFX_INDEX; > - else if (p->rdev->family >= CHIP_TAHITI) { > + if (p->rdev->family >= CHIP_TAHITI) { > if (p->priority > 0) > p->ring = CAYMAN_RING_TYPE_CP1_INDEX; > else > -- > 1.7.7.5 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel