On 6/19/2024 10:01 PM, Jane Jian wrote: > [WHY] > sriov has the higher bit violation when flushing tlb > > [HOW] > normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit violation > RLCG will mask xcd out and always assume it's accessing its own xcd > > [TODO] > later will add the normalization in sriovw/rreg after fixing bugs > > v2 > rename the normalized macro, add ip block type for further use > In subject, part I etc. doesn't look good. Only put the intent - like normalize xcc register offsets for TLB flush. Rest of the story may be put in description. > Signed-off-by: Jane Jian <Jane.Jian@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ > drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 16 ++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 ++++++++-- > drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + > drivers/gpu/drm/amd/amdgpu/soc15.h | 1 + > drivers/gpu/drm/amd/amdgpu/soc15_common.h | 5 ++++- > 6 files changed, 32 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 083f353cff6e..eb2e7312bf1b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -636,6 +636,8 @@ struct amdgpu_asic_funcs { > ssize_t (*get_reg_state)(struct amdgpu_device *adev, > enum amdgpu_reg_state reg_state, void *buf, > size_t max_size); > + /* normalize offset to keep in lower 16-bit */ > + u32 (*normalize_reg_offset)(u32 hwip, u32 offset); Please change the hwip argument type to enum amd_hw_ip_block_type. > }; > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c > index 2c9a0aa41e2d..9b4bea2ca7df 100644 > --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c > +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c > @@ -1085,3 +1085,19 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, > > return size; > } > + > +u32 aqua_vanjaram_normalize_reg_offset(u32 hwip, u32 offset) > +{ > + u32 normalized_offset; > + > + switch (hwip) { > + case GC_HWIP: > + normalized_offset = offset & 0xffff; > + break; > + default: > + normalized_offset = offset; > + break; > + } > + > + return normalized_offset; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 88b4644f8e96..1d24e19f304d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -853,8 +853,14 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > */ > if (adev->gfx.kiq[inst].ring.sched.ready && > (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { > - uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; > - uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; > + > + /* Select lower 16 bits to write in local xcc > + * for MMHUB it uses xcc0, NO cross AID reg offset > + */ The comment about MMHUB is confusing. MMHUB offset in the current form will be SOC wide, though it is passed to xcc0. Better remove it. > + if (AMDGPU_IS_GFXHUB(vmhub)) { > + req = NORMALIZE_XCC_REG_OFFSET(GC, req); Since IP is parameter, naming the macro as XCC_REG_OFFSET doesn't suit. Thanks, Lijo > + ack = NORMALIZE_XCC_REG_OFFSET(GC, ack); > + } > > amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, > 1 << vmid, inst); > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c > index 8d16dacdc172..e6e61fc77080 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > @@ -927,6 +927,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = > .query_video_codecs = &soc15_query_video_codecs, > .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, > .get_reg_state = &aqua_vanjaram_get_reg_state, > + .normalize_reg_offset = &aqua_vanjaram_normalize_reg_offset, > }; > > static int soc15_common_early_init(void *handle) > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h > index 282584a48be0..f1e974604e3e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.h > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h > @@ -124,4 +124,5 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, > void vega10_doorbell_index_init(struct amdgpu_device *adev); > void vega20_doorbell_index_init(struct amdgpu_device *adev); > void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev); > +u32 aqua_vanjaram_normalize_reg_offset(u32 hwip, u32 offset); > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h > index 242b24f73c17..ddf0aad51821 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h > +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h > @@ -210,4 +210,7 @@ > #define WREG64_MCA(ext, mca_base, idx, val) \ > WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val) > > -#endif > +#define NORMALIZE_XCC_REG_OFFSET(ip, offset) \ > + ((amdgpu_sriov_vf(adev) && adev->asic_funcs->normalize_reg_offset) ? \ > + adev->asic_funcs->normalize_reg_offset(ip##_HWIP, offset) : offset) > +#endif > \ No newline at end of file