On Fri, Dec 23, 2016 at 2:08 AM, Xiangliang Yu <Xiangliang.Yu at amd.com> wrote: > KIQ is queue-memory based initialization method: setup KIQ queue > firstly, then send command to KIQ to setup other queues, without > accessing registers. > > For virtualization, need KIQ to access virtual function registers > when running on guest mode. > > changes in v2: > 1. use amdgpu_bo_create/free_kernel to allocate OB. > > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > Signed-off-by: Xiangliang Yu <Xiangliang.Yu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 + > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 607 +++++++++++++++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/vid.h | 2 + > 3 files changed, 620 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 9ea5bf0..c392882 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -186,6 +186,11 @@ enum amdgpu_thermal_irq { > AMDGPU_THERMAL_IRQ_LAST > }; > > +enum amdgpu_kiq_irq { > + AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, > + AMDGPU_CP_KIQ_IRQ_LAST > +}; Is there a reason to make this a separate enum rather than just adding it to amdgpu_cp_irq? > + > int amdgpu_set_clockgating_state(struct amdgpu_device *adev, > enum amd_ip_block_type block_type, > enum amd_clockgating_state state); > @@ -778,6 +783,13 @@ struct amdgpu_mec { > u32 num_queue; > }; > > +struct amdgpu_kiq { > + u64 eop_gpu_addr; > + struct amdgpu_bo *eop_obj; > + struct amdgpu_ring ring; > + struct amdgpu_irq_src irq; > +}; > + > /* > * GPU scratch registers structures, functions & helpers > */ > @@ -853,6 +865,7 @@ struct amdgpu_gfx { > struct amdgpu_gca_config config; > struct amdgpu_rlc rlc; > struct amdgpu_mec mec; > + struct amdgpu_kiq kiq; > struct amdgpu_scratch scratch; > const struct firmware *me_fw; /* ME firmware */ > uint32_t me_fw_version; > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index c3e6bfc..a233eeb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -1367,6 +1367,42 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) > } > } > > +static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, > + struct amdgpu_ring *ring, > + struct amdgpu_irq_src *irq) > +{ > + int r = 0; > + > + ring->adev = NULL; > + ring->ring_obj = NULL; > + ring->use_doorbell = true; > + ring->doorbell_index = AMDGPU_DOORBELL_KIQ; > + if (adev->gfx.mec2_fw) { > + ring->me = 2; > + ring->pipe = 0; > + } else { > + ring->me = 1; > + ring->pipe = 1; > + } > + > + irq->data = ring; > + ring->queue = 0; > + sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); > + r = amdgpu_ring_init(adev, ring, 1024, > + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); > + if (r) > + dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); > + > + return r; > +} > + > +static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, > + struct amdgpu_irq_src *irq) > +{ > + amdgpu_ring_fini(ring); > + irq->data = NULL; > +} > + > #define MEC_HPD_SIZE 2048 > > static int gfx_v8_0_mec_init(struct amdgpu_device *adev) > @@ -1421,6 +1457,35 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) > return 0; > } > > +static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) > +{ > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); > + kiq->eop_obj = NULL; > +} > + > +static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) > +{ > + int r; > + u32 *hpd; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + > + r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, > + AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, > + &kiq->eop_gpu_addr, (void **)&hpd); > + if (r) { > + dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); > + return r; > + } > + > + memset(hpd, 0, MEC_HPD_SIZE); > + > + amdgpu_bo_kunmap(kiq->eop_obj); > + > + return 0; > +} > + > static const u32 vgpr_init_compute_shader[] = > { > 0x7e000209, 0x7e020208, > @@ -1997,8 +2062,14 @@ static int gfx_v8_0_sw_init(void *handle) > { > int i, r; > struct amdgpu_ring *ring; > + struct amdgpu_kiq *kiq; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > + /* KIQ event */ > + r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq); > + if (r) > + return r; > + > /* EOP Event */ > r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); > if (r) > @@ -2036,6 +2107,17 @@ static int gfx_v8_0_sw_init(void *handle) > return r; > } > > + r = gfx_v8_0_kiq_init(adev); > + if (r) { > + DRM_ERROR("Failed to init KIQ BOs!\n"); > + return r; > + } > + > + kiq = &adev->gfx.kiq; > + r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); > + if (r) > + return r; > + > /* set up the gfx ring */ > for (i = 0; i < adev->gfx.num_gfx_rings; i++) { > ring = &adev->gfx.gfx_ring[i]; > @@ -2119,7 +2201,9 @@ static int gfx_v8_0_sw_fini(void *handle) > amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); > for (i = 0; i < adev->gfx.num_compute_rings; i++) > amdgpu_ring_fini(&adev->gfx.compute_ring[i]); > + gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); > > + gfx_v8_0_kiq_fini(adev); > gfx_v8_0_mec_fini(adev); > gfx_v8_0_rlc_fini(adev); > gfx_v8_0_free_microcode(adev); > @@ -4506,6 +4590,393 @@ static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) > } > } > > +/* KIQ functions */ > +static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) > +{ > + uint32_t tmp; > + struct amdgpu_device *adev = ring->adev; > + > + /* tell RLC which is KIQ queue */ > + tmp = RREG32(mmRLC_CP_SCHEDULERS); > + tmp &= 0xffffff00; > + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); > + WREG32(mmRLC_CP_SCHEDULERS, tmp); > + tmp |= 0x80; > + WREG32(mmRLC_CP_SCHEDULERS, tmp); > +} > + > +static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) > +{ > + amdgpu_ring_alloc(ring, 8); > + /* set resources */ > + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); > + amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ > + amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ > + amdgpu_ring_write(ring, 0); /* queue mask hi */ > + amdgpu_ring_write(ring, 0); /* gws mask lo */ > + amdgpu_ring_write(ring, 0); /* gws mask hi */ > + amdgpu_ring_write(ring, 0); /* oac mask */ > + amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ > + amdgpu_ring_commit(ring); > + udelay(50); > +} > + > +static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, > + struct amdgpu_ring *ring) > +{ > + struct amdgpu_device *adev = kiq_ring->adev; > + uint64_t mqd_addr, wptr_addr; > + > + mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); > + wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); > + amdgpu_ring_alloc(kiq_ring, 8); > + > + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); > + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ > + amdgpu_ring_write(kiq_ring, 0x21010000); > + amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | > + (ring->queue << 26) | > + (ring->pipe << 29) | > + ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ > + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); > + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); > + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); > + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); > + amdgpu_ring_commit(kiq_ring); > + udelay(50); > +} > + > +static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, > + struct vi_mqd *mqd, > + uint64_t mqd_gpu_addr, > + uint64_t eop_gpu_addr, > + struct amdgpu_ring *ring) > +{ > + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; > + uint32_t tmp; > + > + mqd->header = 0xC0310800; > + mqd->compute_pipelinestat_enable = 0x00000001; > + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; > + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; > + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; > + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; > + mqd->compute_misc_reserved = 0x00000003; > + > + eop_base_addr = eop_gpu_addr >> 8; > + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; > + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); > + > + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > + tmp = RREG32(mmCP_HQD_EOP_CONTROL); > + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, > + (order_base_2(MEC_HPD_SIZE / 4) - 1)); > + > + mqd->cp_hqd_eop_control = tmp; > + > + /* enable doorbell? */ > + tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > + > + if (ring->use_doorbell) > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > + DOORBELL_EN, 1); > + else > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > + DOORBELL_EN, 0); > + > + mqd->cp_hqd_pq_doorbell_control = tmp; > + > + /* disable the queue if it's active */ > + mqd->cp_hqd_dequeue_request = 0; > + mqd->cp_hqd_pq_rptr = 0; > + mqd->cp_hqd_pq_wptr = 0; > + > + /* set the pointer to the MQD */ > + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; > + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); > + > + /* set MQD vmid to 0 */ > + tmp = RREG32(mmCP_MQD_CONTROL); > + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); > + mqd->cp_mqd_control = tmp; > + > + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > + hqd_gpu_addr = ring->gpu_addr >> 8; > + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; > + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); > + > + /* set up the HQD, this is similar to CP_RB0_CNTL */ > + tmp = RREG32(mmCP_HQD_PQ_CONTROL); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, > + (order_base_2(ring->ring_size / 4) - 1)); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, > + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); > +#ifdef __BIG_ENDIAN > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); > +#endif > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); > + mqd->cp_hqd_pq_control = tmp; > + > + /* set the wb address whether it's enabled or not */ > + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); > + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; > + mqd->cp_hqd_pq_rptr_report_addr_hi = > + upper_32_bits(wb_gpu_addr) & 0xffff; > + > + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ > + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); > + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; > + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; > + > + tmp = 0; > + /* enable the doorbell if requested */ > + if (ring->use_doorbell) { > + tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > + DOORBELL_OFFSET, ring->doorbell_index); > + > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > + DOORBELL_EN, 1); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > + DOORBELL_SOURCE, 0); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, > + DOORBELL_HIT, 0); > + } > + > + mqd->cp_hqd_pq_doorbell_control = tmp; > + > + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > + ring->wptr = 0; > + mqd->cp_hqd_pq_wptr = ring->wptr; > + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); > + > + /* set the vmid for the queue */ > + mqd->cp_hqd_vmid = 0; > + > + tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); > + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); > + mqd->cp_hqd_persistent_state = tmp; > + > + /* activate the queue */ > + mqd->cp_hqd_active = 1; > + > + return 0; > +} > + > +static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, > + struct vi_mqd *mqd, > + struct amdgpu_ring *ring) > +{ > + uint32_t tmp; > + int j; > + > + /* disable wptr polling */ > + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); > + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); > + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); > + > + WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); > + WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); > + > + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ > + WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); > + > + /* enable doorbell? */ > + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); > + > + /* disable the queue if it's active */ > + if (RREG32(mmCP_HQD_ACTIVE) & 1) { > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); > + for (j = 0; j < adev->usec_timeout; j++) { > + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) > + break; > + udelay(1); > + } > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); > + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); > + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > + } > + > + /* set the pointer to the MQD */ > + WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); > + WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); > + > + /* set MQD vmid to 0 */ > + WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); > + > + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ > + WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); > + WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); > + > + /* set up the HQD, this is similar to CP_RB0_CNTL */ > + WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); > + > + /* set the wb address whether it's enabled or not */ > + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, > + mqd->cp_hqd_pq_rptr_report_addr_lo); > + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, > + mqd->cp_hqd_pq_rptr_report_addr_hi); > + > + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ > + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); > + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); > + > + /* enable the doorbell if requested */ > + if (ring->use_doorbell) { > + if ((adev->asic_type == CHIP_CARRIZO) || > + (adev->asic_type == CHIP_FIJI) || > + (adev->asic_type == CHIP_STONEY)) { > + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, > + AMDGPU_DOORBELL_KIQ << 2); > + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, > + AMDGPU_DOORBELL_MEC_RING7 << 2); > + } > + } > + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); > + > + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > + > + /* set the vmid for the queue */ > + WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); > + > + WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); > + > + /* activate the queue */ > + WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); > + > + if (ring->use_doorbell) { > + tmp = RREG32(mmCP_PQ_STATUS); > + tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); > + WREG32(mmCP_PQ_STATUS, tmp); > + } > + > + return 0; > +} > + > +static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, > + struct vi_mqd *mqd, > + u64 mqd_gpu_addr) > +{ > + struct amdgpu_device *adev = ring->adev; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq; > + uint64_t eop_gpu_addr; > + bool is_kiq = false; > + > + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) > + is_kiq = true; > + > + if (is_kiq) { > + eop_gpu_addr = kiq->eop_gpu_addr; > + gfx_v8_0_kiq_setting(&kiq->ring); > + } else > + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + > + ring->queue * MEC_HPD_SIZE; > + > + mutex_lock(&adev->srbm_mutex); > + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > + > + gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); > + > + if (is_kiq) > + gfx_v8_0_kiq_init_register(adev, mqd, ring); > + > + vi_srbm_select(adev, 0, 0, 0, 0); > + mutex_unlock(&adev->srbm_mutex); > + > + if (is_kiq) > + gfx_v8_0_kiq_enable(ring); > + else > + gfx_v8_0_map_queue_enable(&kiq->ring, ring); > + > + return 0; > +} > + > +static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev) > +{ > + struct amdgpu_ring *ring = NULL; > + int i; > + > + for (i = 0; i < adev->gfx.num_compute_rings; i++) { > + ring = &adev->gfx.compute_ring[i]; > + amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); > + ring->mqd_obj = NULL; > + } > + > + ring = &adev->gfx.kiq.ring; > + amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); > + ring->mqd_obj = NULL; > +} > + > +static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev, > + struct amdgpu_ring *ring) > +{ > + struct vi_mqd *mqd; > + u64 mqd_gpu_addr; > + u32 *buf; > + int r = 0; > + > + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, > + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, > + &mqd_gpu_addr, (void **)&buf); > + if (r) { > + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); > + return r; > + } > + > + /* init the mqd struct */ > + memset(buf, 0, sizeof(struct vi_mqd)); > + mqd = (struct vi_mqd *)buf; > + > + r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr); > + if (r) > + return r; > + > + amdgpu_bo_kunmap(ring->mqd_obj); > + > + return 0; > +} > + > +static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) > +{ > + struct amdgpu_ring *ring = NULL; > + int r, i; > + > + ring = &adev->gfx.kiq.ring; > + r = gfx_v8_0_kiq_setup_queue(adev, ring); > + if (r) > + return r; > + > + for (i = 0; i < adev->gfx.num_compute_rings; i++) { > + ring = &adev->gfx.compute_ring[i]; > + r = gfx_v8_0_kiq_setup_queue(adev, ring); > + if (r) > + return r; > + } > + > + gfx_v8_0_cp_compute_enable(adev, true); > + > + for (i = 0; i < adev->gfx.num_compute_rings; i++) { > + ring = &adev->gfx.compute_ring[i]; > + > + ring->ready = true; > + r = amdgpu_ring_test_ring(ring); > + if (r) > + ring->ready = false; > + } > + > + ring = &adev->gfx.kiq.ring; > + ring->ready = true; > + r = amdgpu_ring_test_ring(ring); > + if (r) > + ring->ready = false; > + > + return 0; > +} > + > static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) > { > int r, i, j; > @@ -4806,7 +5277,10 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) > if (r) > return r; > > - r = gfx_v8_0_cp_compute_resume(adev); > + if (amdgpu_sriov_vf(adev)) > + r = gfx_v8_0_kiq_resume(adev); > + else > + r = gfx_v8_0_cp_compute_resume(adev); Is there any reason why we shouldn't use KIQ for the non-sr-iov case as well? I think KIQ is used to manage the compute rings in other OSes. Doesn't necessarily have to happen now, but we should look into it. > if (r) > return r; > > @@ -4845,6 +5319,7 @@ static int gfx_v8_0_hw_fini(void *handle) > amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); > amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); > if (amdgpu_sriov_vf(adev)) { > + gfx_v8_0_kiq_free_queue(adev); > pr_debug("For SRIOV client, shouldn't do anything.\n"); > return 0; > } > @@ -6011,7 +6486,8 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) > { > u32 ref_and_mask, reg_mem_engine; > > - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { > + if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || > + (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { > switch (ring->me) { > case 1: > ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; > @@ -6224,6 +6700,32 @@ static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, > amdgpu_ring_write(ring, upper_32_bits(seq)); > } > > +static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, > + u64 seq, unsigned int flags) > +{ > + /* we only allocate 32bit for each seq wb address */ > + if (flags & AMDGPU_FENCE_FLAG_64BIT) > + BUG(); > + > + /* write fence seq to the "addr" */ > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); > + amdgpu_ring_write(ring, lower_32_bits(addr)); > + amdgpu_ring_write(ring, upper_32_bits(addr)); > + amdgpu_ring_write(ring, lower_32_bits(seq)); > + > + if (flags & AMDGPU_FENCE_FLAG_INT) { > + /* set register to trigger INT */ > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); > + amdgpu_ring_write(ring, mmCPC_INT_STATUS); > + amdgpu_ring_write(ring, 0); > + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ > + } > +} > + > static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) > { > amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); > @@ -6405,6 +6907,72 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, > return 0; > } > > +static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, > + struct amdgpu_irq_src *src, > + unsigned int type, > + enum amdgpu_interrupt_state state) > +{ > + uint32_t tmp, target; > + struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data; > + > + BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); > + > + if (ring->me == 1) > + target = mmCP_ME1_PIPE0_INT_CNTL; > + else > + target = mmCP_ME2_PIPE0_INT_CNTL; > + target += ring->pipe; > + > + switch (type) { > + case AMDGPU_CP_KIQ_IRQ_DRIVER0: > + if (state == AMDGPU_IRQ_STATE_DISABLE) { > + tmp = RREG32(mmCPC_INT_CNTL); > + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, > + GENERIC2_INT_ENABLE, 0); > + WREG32(mmCPC_INT_CNTL, tmp); > + > + tmp = RREG32(target); > + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, > + GENERIC2_INT_ENABLE, 0); > + WREG32(target, tmp); > + } else { > + tmp = RREG32(mmCPC_INT_CNTL); > + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, > + GENERIC2_INT_ENABLE, 1); > + WREG32(mmCPC_INT_CNTL, tmp); > + > + tmp = RREG32(target); > + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, > + GENERIC2_INT_ENABLE, 1); > + WREG32(target, tmp); > + } > + break; > + default: > + BUG(); /* kiq only support GENERIC2_INT now */ > + break; > + } > + return 0; > +} > + > +static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, > + struct amdgpu_irq_src *source, > + struct amdgpu_iv_entry *entry) > +{ > + u8 me_id, pipe_id, queue_id; > + struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data; > + > + BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); > + > + me_id = (entry->ring_id & 0x0c) >> 2; > + pipe_id = (entry->ring_id & 0x03) >> 0; > + queue_id = (entry->ring_id & 0x70) >> 4; > + DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", > + me_id, pipe_id, queue_id); > + > + amdgpu_fence_process(ring); > + return 0; > +} > + > static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { > .name = "gfx_v8_0", > .early_init = gfx_v8_0_early_init, > @@ -6485,10 +7053,37 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { > .pad_ib = amdgpu_ring_generic_pad_ib, > }; > > +static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { > + .type = AMDGPU_RING_TYPE_KIQ, > + .align_mask = 0xff, > + .nop = PACKET3(PACKET3_NOP, 0x3FFF), > + .get_rptr = gfx_v8_0_ring_get_rptr, > + .get_wptr = gfx_v8_0_ring_get_wptr_compute, > + .set_wptr = gfx_v8_0_ring_set_wptr_compute, > + .emit_frame_size = > + 20 + /* gfx_v8_0_ring_emit_gds_switch */ > + 7 + /* gfx_v8_0_ring_emit_hdp_flush */ > + 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ > + 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ > + 17 + /* gfx_v8_0_ring_emit_vm_flush */ > + 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ > + .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ > + .emit_ib = gfx_v8_0_ring_emit_ib_compute, > + .emit_fence = gfx_v8_0_ring_emit_fence_kiq, > + .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, > + .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, > + .test_ring = gfx_v8_0_ring_test_ring, > + .test_ib = gfx_v8_0_ring_test_ib, > + .insert_nop = amdgpu_ring_insert_nop, > + .pad_ib = amdgpu_ring_generic_pad_ib, > +}; > + > static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) > { > int i; > > + adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; > + > for (i = 0; i < adev->gfx.num_gfx_rings; i++) > adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; > > @@ -6511,6 +7106,11 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { > .process = gfx_v8_0_priv_inst_irq, > }; > > +static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { > + .set = gfx_v8_0_kiq_set_interrupt_state, > + .process = gfx_v8_0_kiq_irq, > +}; > + > static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) > { > adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; > @@ -6521,6 +7121,9 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) > > adev->gfx.priv_inst_irq.num_types = 1; > adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; > + > + adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; > + adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; > } > > static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h > index 11746f2..7a3863a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vid.h > +++ b/drivers/gpu/drm/amd/amdgpu/vid.h > @@ -360,6 +360,8 @@ > #define PACKET3_WAIT_ON_CE_COUNTER 0x86 > #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 > #define PACKET3_SWITCH_BUFFER 0x8B > +#define PACKET3_SET_RESOURCES 0xA0 > +#define PACKET3_MAP_QUEUES 0xA2 > > #define VCE_CMD_NO_OP 0x00000000 > #define VCE_CMD_END 0x00000001 > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx