[AMD Official Use Only - General] > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of > Bokun Zhang > Sent: Friday, October 13, 2023 1:43 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Zhang, Bokun <Bokun.Zhang@xxxxxxx> > Subject: [PATCH v3] drm/amd/amdgpu/vcn: Add RB decouple feature under > SRIOV > > - Add code to enable RB decouple feature. > This feature is controlled by SRIOV host. > Once enabled, it allows VCN0's job to be remapped to > VCN1 at hardware level and improves VCN availability > Since this is feature is only used by SRIOV, we need to make sure the changes not affect BM path. Also please split the patch into multiple patches. > Signed-off-by: Bokun Zhang <bokun.zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 3 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 52 +++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 ++ > drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 5 +- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 71 ++++++++++++++++----- > 5 files changed, 110 insertions(+), 25 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > index f4963330c772..7e8c2dbb34fb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > @@ -204,6 +204,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device > *adev) > adev->vcn.inst[i].fw_shared.gpu_addr = adev- > >vcn.inst[i].gpu_addr + > bo_size - fw_shared_size; > > + /* clean up fw share */ > + memset(adev->vcn.inst[i].fw_shared.cpu_addr, 0, > fw_shared_size); > + > This should be redundant, since it should be got cleared when allocation. adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; > > if (amdgpu_vcnfw_log) { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > index 0815c5a97564..6935ab74f481 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > @@ -169,6 +169,9 @@ > #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define > AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11) #define > AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14) > +#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15) > + > +#define MAX_NUM_VCN_RB_SETUP 4 > > #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 > #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 > @@ -335,22 +338,46 @@ struct amdgpu_fw_shared { > struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; > > +struct amdgpu_vcn_rb_setup_info > +{ > + uint32_t rb_addr_lo; > + uint32_t rb_addr_hi; > + uint32_t rb_size; > +}; > + > struct amdgpu_fw_shared_rb_setup { > uint32_t is_rb_enabled_flags; > - uint32_t rb_addr_lo; > - uint32_t rb_addr_hi; > - uint32_t rb_size; > - uint32_t rb4_addr_lo; > - uint32_t rb4_addr_hi; > - uint32_t rb4_size; > - uint32_t reserved[6]; > + > + union { > + // 12 DWords This can be removed. > + struct { > + uint32_t rb_addr_lo; > + uint32_t rb_addr_hi; > + uint32_t rb_size; > + uint32_t rb4_addr_lo; > + uint32_t rb4_addr_hi; > + uint32_t rb4_size; > + uint32_t reserved[6]; > + }; > + > + // 12 DWords Same here. Regards, Leo > + struct { > + struct amdgpu_vcn_rb_setup_info > rb_info[MAX_NUM_VCN_RB_SETUP]; > + }; > + }; > }; > > + > struct amdgpu_fw_shared_drm_key_wa { > uint8_t method; > uint8_t reserved[3]; > }; > > +struct amdgpu_fw_shared_queue_decouple { > + uint8_t is_enabled; > + uint8_t reserved[7]; > +}; > + > struct amdgpu_vcn4_fw_shared { > uint32_t present_flag_0; > uint8_t pad[12]; > @@ -361,6 +388,8 @@ struct amdgpu_vcn4_fw_shared { > struct amdgpu_fw_shared_rb_setup rb_setup; > struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; > struct amdgpu_fw_shared_drm_key_wa drm_key_wa; > + uint8_t pad3[9]; > + struct amdgpu_fw_shared_queue_decouple decouple; > }; > > struct amdgpu_vcn_fwlog { > @@ -378,6 +407,15 @@ struct amdgpu_vcn_decode_buffer { > uint32_t pad[30]; > }; > > +struct amdgpu_vcn_rb_metadata { > + uint32_t size; > + uint32_t present_flag_0; > + > + uint8_t version; > + uint8_t ring_id; > + uint8_t pad[26]; > +}; > + > #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define > VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define > VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index fabb83e9d9ae..858ef21ae515 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -126,6 +126,8 @@ enum AMDGIM_FEATURE_FLAG { > AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), > /* AV1 Support MODE*/ > AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), > + /* VCN RB decouple */ > + AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), > }; > > enum AMDGIM_REG_ACCESS_FLAG { > @@ -326,6 +328,8 @@ static inline bool is_virtual_machine(void) > ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) #define > amdgpu_sriov_is_av1_support(adev) \ > ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) > +#define amdgpu_sriov_is_vcn_rb_decouple(adev) \ > + ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) > bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void > amdgpu_virt_init_setting(struct amdgpu_device *adev); void > amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > index 104a5ad8397d..51a14f6d93bd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > @@ -90,10 +90,11 @@ union amd_sriov_msg_feature_flags { > uint32_t host_load_ucodes : 1; > uint32_t host_flr_vramlost : 1; > uint32_t mm_bw_management : 1; > - uint32_t pp_one_vf_mode : 1; > + uint32_t pp_one_vf_mode : 1; > uint32_t reg_indirect_acc : 1; > uint32_t av1_support : 1; > - uint32_t reserved : 25; > + uint32_t vcn_rb_decouple : 1; > + uint32_t reserved : 24; > } flags; > uint32_t all; > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > index 88e17f5e20b2..bf07aa200030 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > @@ -176,9 +176,6 @@ static int vcn_v4_0_sw_init(void *handle) > > AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSH > AKING; > } > > - if (amdgpu_sriov_vf(adev)) > - fw_shared->present_flag_0 |= > cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); > - > if (amdgpu_vcnfw_log) > amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); > } > @@ -1209,6 +1206,24 @@ static int vcn_v4_0_start(struct amdgpu_device > *adev) > return 0; > } > > +static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, > +uint32_t vcn_inst, struct amdgpu_ring *ring_enc) { > + struct amdgpu_vcn_rb_metadata *rb_metadata = NULL; > + uint8_t *rb_ptr = (uint8_t *)ring_enc->ring; > + > + rb_ptr += ring_enc->ring_size; > + rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr; > + > + memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata)); > + rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata); > + rb_metadata->present_flag_0 |= > cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); > + rb_metadata->present_flag_0 |= > cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); > + rb_metadata->version = 1; > + rb_metadata->ring_id = vcn_inst & 0xFF; > + > + return 0; > +} > + > static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) { > int i; > @@ -1334,11 +1349,30 @@ static int vcn_v4_0_start_sriov(struct > amdgpu_device *adev) > rb_enc_addr = ring_enc->gpu_addr; > > rb_setup->is_rb_enabled_flags |= RB_ENABLED; > - rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); > - rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); > - rb_setup->rb_size = ring_enc->ring_size / 4; > fw_shared->present_flag_0 |= > cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); > > + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { > + vcn_v4_0_init_ring_metadata(adev, i, ring_enc); > + > + memset((void *)&rb_setup->rb_info, 0, sizeof(struct > amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP); > + if (!(adev->vcn.harvest_config & (1 << 0))) { > + rb_setup->rb_info[0].rb_addr_lo = > lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); > + rb_setup->rb_info[0].rb_addr_hi = > upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); > + rb_setup->rb_info[0].rb_size = adev- > >vcn.inst[0].ring_enc[0].ring_size / 4; > + } > + if (!(adev->vcn.harvest_config & (1 << 1))) { > + rb_setup->rb_info[2].rb_addr_lo = > lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); > + rb_setup->rb_info[2].rb_addr_hi = > upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); > + rb_setup->rb_info[2].rb_size = adev- > >vcn.inst[1].ring_enc[0].ring_size / 4; > + } > + fw_shared->decouple.is_enabled = 1; > + fw_shared->present_flag_0 |= > cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); > + } else { > + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); > + rb_setup->rb_addr_hi = > upper_32_bits(rb_enc_addr); > + rb_setup->rb_size = ring_enc->ring_size / 4; > + } > + > > MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, > regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), > lower_32_bits(adev- > >vcn.inst[i].fw_shared.gpu_addr)); > @@ -1810,6 +1844,7 @@ static struct amdgpu_ring_funcs > vcn_v4_0_unified_ring_vm_funcs = { > .type = AMDGPU_RING_TYPE_VCN_ENC, > .align_mask = 0x3f, > .nop = VCN_ENC_CMD_NO_OP, > + .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata), > .get_rptr = vcn_v4_0_unified_ring_get_rptr, > .get_wptr = vcn_v4_0_unified_ring_get_wptr, > .set_wptr = vcn_v4_0_unified_ring_set_wptr, @@ -2023,16 +2058,20 > @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, > struct amdgpu_ { > uint32_t ip_instance; > > - switch (entry->client_id) { > - case SOC15_IH_CLIENTID_VCN: > - ip_instance = 0; > - break; > - case SOC15_IH_CLIENTID_VCN1: > - ip_instance = 1; > - break; > - default: > - DRM_ERROR("Unhandled client id: %d\n", entry->client_id); > - return 0; > + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { > + ip_instance = entry->ring_id; > + } else { > + switch (entry->client_id) { > + case SOC15_IH_CLIENTID_VCN: > + ip_instance = 0; > + break; > + case SOC15_IH_CLIENTID_VCN1: > + ip_instance = 1; > + break; > + default: > + DRM_ERROR("Unhandled client id: %d\n", entry- > >client_id); > + return 0; > + } > } > > DRM_DEBUG("IH: VCN TRAP\n"); > -- > 2.34.1