On 2017-04-17 06:00 PM, Alex Deucher wrote: > KIQ is the Kernel Interface Queue for managing the MEC. Rather than setting > up rings via direct MMIO of ring registers, the rings are configured via > special packets sent to the KIQ. The allows the MEC to better manage shared > resources and certain power events. It also reduces the code paths in the > driver to support and is required for MEC powergating. > > v2: drop gfx_v9_0_cp_compute_fini() as well > v3: rebase on latest changes derived from gfx8, add unmap queues on > hw_fini > > Signed-off-by: Alex Deucher <alexander.deucher at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 374 ++++++++-------------------------- > 1 file changed, 81 insertions(+), 293 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index f86a7f6..ef7a3ee 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -1113,23 +1113,21 @@ static int gfx_v9_0_sw_init(void *handle) > return r; > } > > - if (amdgpu_sriov_vf(adev)) { > - r = gfx_v9_0_kiq_init(adev); > - if (r) { > - DRM_ERROR("Failed to init KIQ BOs!\n"); > - return r; > - } > + r = gfx_v9_0_kiq_init(adev); > + if (r) { > + DRM_ERROR("Failed to init KIQ BOs!\n"); > + return r; > + } > > - kiq = &adev->gfx.kiq; > - r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); > - if (r) > - return r; > + kiq = &adev->gfx.kiq; > + r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); > + if (r) > + return r; > > - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ > - r = gfx_v9_0_compute_mqd_sw_init(adev); > - if (r) > - return r; > - } > + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ s/wel/well Typo was there before, but a good change to fix it. > + r = gfx_v9_0_compute_mqd_sw_init(adev); > + if (r) > + return r; > > /* reserve GDS, GWS and OA resource for gfx */ > r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, > @@ -1176,11 +1174,9 @@ static int gfx_v9_0_sw_fini(void *handle) > for (i = 0; i < adev->gfx.num_compute_rings; i++) > amdgpu_ring_fini(&adev->gfx.compute_ring[i]); > > - if (amdgpu_sriov_vf(adev)) { > - gfx_v9_0_compute_mqd_sw_fini(adev); > - gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); > - gfx_v9_0_kiq_fini(adev); > - } > + gfx_v9_0_compute_mqd_sw_fini(adev); > + gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); > + gfx_v9_0_kiq_fini(adev); > > gfx_v9_0_mec_fini(adev); > gfx_v9_0_ngg_fini(adev); > @@ -1736,13 +1732,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) > udelay(50); > } > > -static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) > -{ > - gfx_v9_0_cp_compute_enable(adev, true); > - > - return 0; > -} > - > static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) > { > const struct gfx_firmware_header_v1_0 *mec_hdr; > @@ -1785,45 +1774,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) > return 0; > } > > -static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) > -{ > - int i, r; > - > - for (i = 0; i < adev->gfx.num_compute_rings; i++) { > - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; > - > - if (ring->mqd_obj) { > - r = amdgpu_bo_reserve(ring->mqd_obj, false); > - if (unlikely(r != 0)) > - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); > - > - amdgpu_bo_unpin(ring->mqd_obj); > - amdgpu_bo_unreserve(ring->mqd_obj); > - > - amdgpu_bo_unref(&ring->mqd_obj); > - ring->mqd_obj = NULL; > - } > - } > -} > - > -static int gfx_v9_0_init_queue(struct amdgpu_ring *ring); > - > -static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) > -{ > - int i, r; > - for (i = 0; i < adev->gfx.num_compute_rings; i++) { > - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; > - if (gfx_v9_0_init_queue(ring)) > - dev_warn(adev->dev, "compute queue %d init failed!\n", i); > - } > - > - r = gfx_v9_0_cp_compute_start(adev); > - if (r) > - return r; > - > - return 0; > -} > - > /* KIQ functions */ > static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) > { > @@ -1914,6 +1864,56 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) > return r; > } > > +static int gfx_v9_0_kiq_kcq_disable(struct amdgpu_device *adev) > +{ > + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; > + uint32_t scratch, tmp = 0; > + int r, i; > + > + r = amdgpu_gfx_scratch_get(adev, &scratch); > + if (r) { > + DRM_ERROR("Failed to get scratch reg (%d).\n", r); > + return r; > + } > + WREG32(scratch, 0xCAFEDEAD); WREG32_SOC15 needed here I think > + > + r = amdgpu_ring_alloc(kiq_ring, 6 + 3); > + if (r) { > + DRM_ERROR("Failed to lock KIQ (%d).\n", r); > + amdgpu_gfx_scratch_free(adev, scratch); > + return r; > + } > + /* unmap queues */ > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); > + amdgpu_ring_write(kiq_ring, > + PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */ > + PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */ > + amdgpu_ring_write(kiq_ring, 0); > + amdgpu_ring_write(kiq_ring, 0); > + amdgpu_ring_write(kiq_ring, 0); > + amdgpu_ring_write(kiq_ring, 0); > + /* write to scratch for completion */ > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); > + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); > + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); > + amdgpu_ring_commit(kiq_ring); > + > + for (i = 0; i < adev->usec_timeout; i++) { > + tmp = RREG32(scratch); > + if (tmp == 0xDEADBEEF) > + break; > + DRM_UDELAY(1); > + } > + if (i >= adev->usec_timeout) { > + DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", > + scratch, tmp); > + r = -EINVAL; > + } > + amdgpu_gfx_scratch_free(adev, scratch); > + > + return r; > +} > + > static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > @@ -2194,7 +2194,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) > struct v9_mqd *mqd = ring->mqd_ptr; > int mqd_idx = ring - &adev->gfx.compute_ring[0]; > > - if (!adev->gfx.in_reset) { > + if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { > memset((void *)mqd, 0, sizeof(*mqd)); > mutex_lock(&adev->srbm_mutex); > soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > @@ -2285,10 +2285,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) > if (r) > return r; > > - if (amdgpu_sriov_vf(adev)) > - r = gfx_v9_0_kiq_resume(adev); > - else > - r = gfx_v9_0_cp_compute_resume(adev); > + r = gfx_v9_0_kiq_resume(adev); > if (r) > return r; > > @@ -2298,6 +2295,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) > ring->ready = false; > return r; > } > + > + ring = &adev->gfx.kiq.ring; > + ring->ready = true; > + r = amdgpu_ring_test_ring(ring); > + if (r) > + ring->ready = false; > + > for (i = 0; i < adev->gfx.num_compute_rings; i++) { > ring = &adev->gfx.compute_ring[i]; > > @@ -2307,14 +2311,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) > ring->ready = false; > } > > - if (amdgpu_sriov_vf(adev)) { > - ring = &adev->gfx.kiq.ring; > - ring->ready = true; > - r = amdgpu_ring_test_ring(ring); > - if (r) > - ring->ready = false; > - } > - > gfx_v9_0_enable_gui_idle_interrupt(adev, true); > > return 0; > @@ -2360,9 +2356,9 @@ static int gfx_v9_0_hw_fini(void *handle) > pr_debug("For SRIOV client, shouldn't do anything.\n"); > return 0; > } > + gfx_v9_0_kiq_kcq_disable(adev); > gfx_v9_0_cp_enable(adev, false); > gfx_v9_0_rlc_stop(adev); > - gfx_v9_0_cp_compute_fini(adev); > > return 0; > } > @@ -2371,14 +2367,18 @@ static int gfx_v9_0_suspend(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > + adev->gfx.in_suspend = true; > return gfx_v9_0_hw_fini(adev); > } > > static int gfx_v9_0_resume(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > + int r; > > - return gfx_v9_0_hw_init(adev); > + r = gfx_v9_0_hw_init(adev); > + adev->gfx.in_suspend = false; > + return r; > } > > static bool gfx_v9_0_is_idle(void *handle) > @@ -3753,218 +3753,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, > return 0; > } > > -static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) > -{ > - int r, j; > - u32 tmp; > - bool use_doorbell = true; > - u64 hqd_gpu_addr; > - u64 mqd_gpu_addr; > - u64 eop_gpu_addr; > - u64 wb_gpu_addr; > - u32 *buf; > - struct v9_mqd *mqd; > - struct amdgpu_device *adev; > - > - adev = ring->adev; > - if (ring->mqd_obj == NULL) { > - r = amdgpu_bo_create(adev, > - sizeof(struct v9_mqd), > - PAGE_SIZE,true, > - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, > - NULL, &ring->mqd_obj); > - if (r) { > - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); > - return r; > - } > - } > - > - r = amdgpu_bo_reserve(ring->mqd_obj, false); > - if (unlikely(r != 0)) { > - gfx_v9_0_cp_compute_fini(adev); > - return r; > - } > - > - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, > - &mqd_gpu_addr); > - if (r) { > - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); > - gfx_v9_0_cp_compute_fini(adev); > - return r; > - } > - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); > - if (r) { > - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); > - gfx_v9_0_cp_compute_fini(adev); > - return r; > - } > - > - /* init the mqd struct */ > - memset(buf, 0, sizeof(struct v9_mqd)); > - > - mqd = (struct v9_mqd *)buf; > - mqd->header = 0xC0310800; > - mqd->compute_pipelinestat_enable = 0x00000001; > - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; > - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; > - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; > - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; > - mqd->compute_misc_reserved = 0x00000003; > - mutex_lock(&adev->srbm_mutex); > - soc15_grbm_select(adev, ring->me, > - ring->pipe, > - ring->queue, 0); > - /* disable wptr polling */ > - WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); > - > - /* write the EOP addr */ > - BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ > - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); > - eop_gpu_addr >>= 8; > - > - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr)); > - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); > - mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); > - mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); > - > - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, > - (order_base_2(MEC_HPD_SIZE / 4) - 1)); > - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp); > - > - /* enable doorbell? */ > - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); > - if (use_doorbell) > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); > - else > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); > - > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); > - mqd->cp_hqd_pq_doorbell_control = tmp; > - > - /* disable the queue if it's active */ > - ring->wptr = 0; > - mqd->cp_hqd_dequeue_request = 0; > - mqd->cp_hqd_pq_rptr = 0; > - mqd->cp_hqd_pq_wptr_lo = 0; > - mqd->cp_hqd_pq_wptr_hi = 0; > - if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { > - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); > - for (j = 0; j < adev->usec_timeout; j++) { > - if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) > - break; > - udelay(1); > - } > - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); > - } > - > - /* set the pointer to the MQD */ > - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; > - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); > - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); > - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); > - > - /* set MQD vmid to 0 */ > - tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); > - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp); > - mqd->cp_mqd_control = tmp; > - > - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > - hqd_gpu_addr = ring->gpu_addr >> 8; > - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; > - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); > - > - /* set up the HQD, this is similar to CP_RB0_CNTL */ > - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, > - (order_base_2(ring->ring_size / 4) - 1)); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, > - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); > -#ifdef __BIG_ENDIAN > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); > -#endif > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp); > - mqd->cp_hqd_pq_control = tmp; > - > - /* set the wb address wether it's enabled or not */ > - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); > - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; > - mqd->cp_hqd_pq_rptr_report_addr_hi = > - upper_32_bits(wb_gpu_addr) & 0xffff; > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, > - mqd->cp_hqd_pq_rptr_report_addr_lo); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, > - mqd->cp_hqd_pq_rptr_report_addr_hi); > - > - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ > - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); > - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; > - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, > - mqd->cp_hqd_pq_wptr_poll_addr_lo); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, > - mqd->cp_hqd_pq_wptr_poll_addr_hi); > - > - /* enable the doorbell if requested */ > - if (use_doorbell) { > - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, > - (AMDGPU_DOORBELL64_KIQ * 2) << 2); > - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, > - (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); > - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > - DOORBELL_OFFSET, ring->doorbell_index); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); > - mqd->cp_hqd_pq_doorbell_control = tmp; > - > - } else { > - mqd->cp_hqd_pq_doorbell_control = 0; > - } > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, > - mqd->cp_hqd_pq_doorbell_control); > - > - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); > - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); > - > - /* set the vmid for the queue */ > - mqd->cp_hqd_vmid = 0; > - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); > - > - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); > - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); > - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp); > - mqd->cp_hqd_persistent_state = tmp; > - > - /* activate the queue */ > - mqd->cp_hqd_active = 1; > - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); > - > - soc15_grbm_select(adev, 0, 0, 0, 0); > - mutex_unlock(&adev->srbm_mutex); > - > - amdgpu_bo_kunmap(ring->mqd_obj); > - amdgpu_bo_unreserve(ring->mqd_obj); > - > - if (use_doorbell) > - WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); > - > - return 0; > -} > - > const struct amdgpu_ip_block_version gfx_v9_0_ip_block = > { > .type = AMD_IP_BLOCK_TYPE_GFX, >