[AMD Official Use Only - General] Thanks Leo. I'll restore the check for sriov before calling amdgpu_virt_alloc_mm_table(). That will make it consistent with other vcn ip versions. I'll retain the check for sriov inside amdgpu_virt_alloc_mm_table() as well, as a conservative check. Thanks, Samir -----Original Message----- From: Liu, Leo <Leo.Liu@xxxxxxx> Sent: Tuesday, August 8, 2023 8:29 AM To: Dhume, Samir <Samir.Dhume@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Luo, Zhigang <Zhigang.Luo@xxxxxxx>; Chen, Guchun <Guchun.Chen@xxxxxxx>; Wan, Gavin <Gavin.Wan@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Min, Frank <Frank.Min@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: Re: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3 On 2023-07-28 15:15, Samir Dhume wrote: > initialization table handshake with mmsch > > Signed-off-by: Samir Dhume <samir.dhume@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++--- > 1 file changed, 233 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > index 411c1d802823..b978265b2d77 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > @@ -31,6 +31,7 @@ > #include "soc15d.h" > #include "soc15_hw_ip.h" > #include "vcn_v2_0.h" > +#include "mmsch_v4_0_3.h" > > #include "vcn/vcn_4_0_3_offset.h" > #include "vcn/vcn_4_0_3_sh_mask.h" > @@ -44,6 +45,7 @@ > #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 > #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 > > +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); > static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); > static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); > static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6 > +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle) > amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); > } > > + r = amdgpu_virt_alloc_mm_table(adev); Since this function is not for bare-metal, please move amdgpu_sriov_vf() check from inside of the function to here, to avoid confusion. > + if (r) > + return r; > + > if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) > adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; > > @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) > drm_dev_exit(idx); > } > > + amdgpu_virt_free_mm_table(adev); Same as above. Regards, Leo > + > r = amdgpu_vcn_suspend(adev); > if (r) > return r; > @@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle) > struct amdgpu_ring *ring; > int i, r, vcn_inst; > > - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > - vcn_inst = GET_INST(VCN, i); > - ring = &adev->vcn.inst[i].ring_enc[0]; > + if (amdgpu_sriov_vf(adev)) { > + r = vcn_v4_0_3_start_sriov(adev); > + if (r) > + goto done; > > - if (ring->use_doorbell) { > - adev->nbio.funcs->vcn_doorbell_range( > - adev, ring->use_doorbell, > - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + > - 9 * vcn_inst, > - adev->vcn.inst[i].aid_id); > - > - WREG32_SOC15( > - VCN, GET_INST(VCN, ring->me), > - regVCN_RB1_DB_CTRL, > - ring->doorbell_index > - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | > - VCN_RB1_DB_CTRL__EN_MASK); > - > - /* Read DB_CTRL to flush the write DB_CTRL command. */ > - RREG32_SOC15( > - VCN, GET_INST(VCN, ring->me), > - regVCN_RB1_DB_CTRL); > + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > + ring = &adev->vcn.inst[i].ring_enc[0]; > + ring->wptr = 0; > + ring->wptr_old = 0; > + vcn_v4_0_3_unified_ring_set_wptr(ring); > + ring->sched.ready = true; > } > + } else { > + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > + vcn_inst = GET_INST(VCN, i); > + ring = &adev->vcn.inst[i].ring_enc[0]; > + > + if (ring->use_doorbell) { > + adev->nbio.funcs->vcn_doorbell_range( > + adev, ring->use_doorbell, > + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + > + 9 * vcn_inst, > + adev->vcn.inst[i].aid_id); > + > + WREG32_SOC15( > + VCN, GET_INST(VCN, ring->me), > + regVCN_RB1_DB_CTRL, > + ring->doorbell_index > + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | > + VCN_RB1_DB_CTRL__EN_MASK); > + > + /* Read DB_CTRL to flush the write DB_CTRL command. */ > + RREG32_SOC15( > + VCN, GET_INST(VCN, ring->me), > + regVCN_RB1_DB_CTRL); > + } > > - r = amdgpu_ring_test_helper(ring); > - if (r) > - goto done; > + r = amdgpu_ring_test_helper(ring); > + if (r) > + goto done; > + } > } > > done: > @@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b > return 0; > } > > +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) > +{ > + int i, vcn_inst; > + struct amdgpu_ring *ring_enc; > + uint64_t cache_addr; > + uint64_t rb_enc_addr; > + uint64_t ctx_addr; > + uint32_t param, resp, expected; > + uint32_t offset, cache_size; > + uint32_t tmp, timeout; > + > + struct amdgpu_mm_table *table = &adev->virt.mm_table; > + uint32_t *table_loc; > + uint32_t table_size; > + uint32_t size, size_dw; > + uint32_t init_status; > + uint32_t enabled_vcn; > + > + struct mmsch_v4_0_cmd_direct_write > + direct_wt = { {0} }; > + struct mmsch_v4_0_cmd_direct_read_modify_write > + direct_rd_mod_wt = { {0} }; > + struct mmsch_v4_0_cmd_end end = { {0} }; > + struct mmsch_v4_0_3_init_header header; > + > + volatile struct amdgpu_vcn4_fw_shared *fw_shared; > + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; > + > + direct_wt.cmd_header.command_type = > + MMSCH_COMMAND__DIRECT_REG_WRITE; > + direct_rd_mod_wt.cmd_header.command_type = > + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; > + end.cmd_header.command_type = MMSCH_COMMAND__END; > + > + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { > + vcn_inst = GET_INST(VCN, i); > + > + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); > + header.version = MMSCH_VERSION; > + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; > + > + table_loc = (uint32_t *)table->cpu_addr; > + table_loc += header.total_size; > + > + table_size = 0; > + > + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), > + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); > + > + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); > + > + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), > + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); > + > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), > + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); > + > + offset = 0; > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_CACHE_OFFSET0), 0); > + } else { > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), > + lower_32_bits(adev->vcn.inst[i].gpu_addr)); > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), > + upper_32_bits(adev->vcn.inst[i].gpu_addr)); > + offset = cache_size; > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_CACHE_OFFSET0), > + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); > + } > + > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_CACHE_SIZE0), > + cache_size); > + > + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset; > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr)); > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_CACHE_OFFSET1), 0); > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); > + > + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset + > + AMDGPU_VCN_STACK_SIZE; > + > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr)); > + > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); > + > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_CACHE_OFFSET2), 0); > + > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); > + > + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr; > + rb_setup = &fw_shared->rb_setup; > + > + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0]; > + ring_enc->wptr = 0; > + rb_enc_addr = ring_enc->gpu_addr; > + > + rb_setup->is_rb_enabled_flags |= RB_ENABLED; > + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); > + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); > + rb_setup->rb_size = ring_enc->ring_size / 4; > + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); > + > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), > + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), > + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); > + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, > + regUVD_VCPU_NONCACHE_SIZE0), > + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); > + MMSCH_V4_0_INSERT_END(); > + > + header.vcn0.init_status = 0; > + header.vcn0.table_offset = header.total_size; > + header.vcn0.table_size = table_size; > + header.total_size += table_size; > + > + /* Send init table to mmsch */ > + size = sizeof(struct mmsch_v4_0_3_init_header); > + table_loc = (uint32_t *)table->cpu_addr; > + memcpy((void *)table_loc, &header, size); > + > + ctx_addr = table->gpu_addr; > + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); > + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); > + > + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID); > + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; > + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); > + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp); > + > + size = header.total_size; > + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size); > + > + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0); > + > + param = 0x00000001; > + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param); > + tmp = 0; > + timeout = 1000; > + resp = 0; > + expected = MMSCH_VF_MAILBOX_RESP__OK; > + while (resp != expected) { > + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP); > + if (resp != 0) > + break; > + > + udelay(10); > + tmp = tmp + 10; > + if (tmp >= timeout) { > + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ > + " waiting for regMMSCH_VF_MAILBOX_RESP "\ > + "(expected=0x%08x, readback=0x%08x)\n", > + tmp, expected, resp); > + return -EBUSY; > + } > + } > + > + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; > + init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status; > + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE > + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) { > + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ > + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); > + } > + } > + > + return 0; > +} > + > /** > * vcn_v4_0_3_start - VCN start > *