[AMD Official Use Only - AMD Internal Distribution Only] Hi Christian, The reason that why I moved around amdgpu_asic_funcs because the new function that I declared in amdgpu_asic_funcs used the enum amd_hw_ip_block_type, therefore I move it after the definition of the amd_hw_ip_block_type. Thanks, Jane -----Original Message----- From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx> Sent: Monday, June 24, 2024 5:35 PM To: Jian, Jane <Jane.Jian@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Chang, HaiJun <HaiJun.Chang@xxxxxxx>; Zhao, Victor <Victor.Zhao@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Subject: Re: [PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov in TLB flush Am 24.06.24 um 11:13 schrieb Jane Jian: > [WHY] > sriov has the higher bit violation when flushing tlb > > [HOW] > normalize the registers to keep lower 16-bit(dword aligned) to aviod > higher bit violation RLCG will mask xcd out and always assume it's > accessing its own xcd > > [TODO] > later will add the normalization in sriovw/rreg after fixing bugs > > v2 > rename the normalized macro, add ip block type for further use move > asics func declaration after ip block type since new func refers ip > block type add normalization in emit flush tlb as well > > v3 > declare the new func in the asic specific header > > Signed-off-by: Jane Jian <Jane.Jian@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 112 +++++++++++---------- > drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 17 ++++ > drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h | 28 ++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 32 ++++-- > drivers/gpu/drm/amd/amdgpu/soc15.c | 2 + > drivers/gpu/drm/amd/amdgpu/soc15_common.h | 5 +- > 6 files changed, 130 insertions(+), 66 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 083f353cff6e..070fd9e601fe 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -583,61 +583,6 @@ struct amdgpu_video_codecs { > const struct amdgpu_video_codec_info *codec_array; > }; > > -/* > - * ASIC specific functions. > - */ > -struct amdgpu_asic_funcs { > - bool (*read_disabled_bios)(struct amdgpu_device *adev); > - bool (*read_bios_from_rom)(struct amdgpu_device *adev, > - u8 *bios, u32 length_bytes); > - int (*read_register)(struct amdgpu_device *adev, u32 se_num, > - u32 sh_num, u32 reg_offset, u32 *value); > - void (*set_vga_state)(struct amdgpu_device *adev, bool state); > - int (*reset)(struct amdgpu_device *adev); > - enum amd_reset_method (*reset_method)(struct amdgpu_device *adev); > - /* get the reference clock */ > - u32 (*get_xclk)(struct amdgpu_device *adev); > - /* MM block clocks */ > - int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk); > - int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk); > - /* static power management */ > - int (*get_pcie_lanes)(struct amdgpu_device *adev); > - void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes); > - /* get config memsize register */ > - u32 (*get_config_memsize)(struct amdgpu_device *adev); > - /* flush hdp write queue */ > - void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); > - /* invalidate hdp read cache */ > - void (*invalidate_hdp)(struct amdgpu_device *adev, > - struct amdgpu_ring *ring); > - /* check if the asic needs a full reset of if soft reset will work */ > - bool (*need_full_reset)(struct amdgpu_device *adev); > - /* initialize doorbell layout for specific asic*/ > - void (*init_doorbell_index)(struct amdgpu_device *adev); > - /* PCIe bandwidth usage */ > - void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, > - uint64_t *count1); > - /* do we need to reset the asic at init time (e.g., kexec) */ > - bool (*need_reset_on_init)(struct amdgpu_device *adev); > - /* PCIe replay counter */ > - uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); > - /* device supports BACO */ > - int (*supports_baco)(struct amdgpu_device *adev); > - /* pre asic_init quirks */ > - void (*pre_asic_init)(struct amdgpu_device *adev); > - /* enter/exit umd stable pstate */ > - int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter); > - /* query video codecs */ > - int (*query_video_codecs)(struct amdgpu_device *adev, bool encode, > - const struct amdgpu_video_codecs **codecs); > - /* encode "> 32bits" smn addressing */ > - u64 (*encode_ext_smn_addressing)(int ext_id); > - > - ssize_t (*get_reg_state)(struct amdgpu_device *adev, > - enum amdgpu_reg_state reg_state, void *buf, > - size_t max_size); > -}; > - > /* > * IOCTL. > */ > @@ -728,6 +673,63 @@ enum amd_hw_ip_block_type { > MAX_HWIP > }; > > +/* > + * ASIC specific functions. > + */ > +struct amdgpu_asic_funcs { > + bool (*read_disabled_bios)(struct amdgpu_device *adev); > + bool (*read_bios_from_rom)(struct amdgpu_device *adev, > + u8 *bios, u32 length_bytes); > + int (*read_register)(struct amdgpu_device *adev, u32 se_num, > + u32 sh_num, u32 reg_offset, u32 *value); > + void (*set_vga_state)(struct amdgpu_device *adev, bool state); > + int (*reset)(struct amdgpu_device *adev); > + enum amd_reset_method (*reset_method)(struct amdgpu_device *adev); > + /* get the reference clock */ > + u32 (*get_xclk)(struct amdgpu_device *adev); > + /* MM block clocks */ > + int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk); > + int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk); > + /* static power management */ > + int (*get_pcie_lanes)(struct amdgpu_device *adev); > + void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes); > + /* get config memsize register */ > + u32 (*get_config_memsize)(struct amdgpu_device *adev); > + /* flush hdp write queue */ > + void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); > + /* invalidate hdp read cache */ > + void (*invalidate_hdp)(struct amdgpu_device *adev, > + struct amdgpu_ring *ring); > + /* check if the asic needs a full reset of if soft reset will work */ > + bool (*need_full_reset)(struct amdgpu_device *adev); > + /* initialize doorbell layout for specific asic*/ > + void (*init_doorbell_index)(struct amdgpu_device *adev); > + /* PCIe bandwidth usage */ > + void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, > + uint64_t *count1); > + /* do we need to reset the asic at init time (e.g., kexec) */ > + bool (*need_reset_on_init)(struct amdgpu_device *adev); > + /* PCIe replay counter */ > + uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); > + /* device supports BACO */ > + int (*supports_baco)(struct amdgpu_device *adev); > + /* pre asic_init quirks */ > + void (*pre_asic_init)(struct amdgpu_device *adev); > + /* enter/exit umd stable pstate */ > + int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter); > + /* query video codecs */ > + int (*query_video_codecs)(struct amdgpu_device *adev, bool encode, > + const struct amdgpu_video_codecs **codecs); > + /* encode "> 32bits" smn addressing */ > + u64 (*encode_ext_smn_addressing)(int ext_id); > + > + ssize_t (*get_reg_state)(struct amdgpu_device *adev, > + enum amdgpu_reg_state reg_state, void *buf, > + size_t max_size); > + /* normalize offset to keep in lower 16-bit */ > + u32 (*normalize_reg_offset)(enum amd_hw_ip_block_type hwip, u32 > +offset); }; > + Why are you moving this definition around? Regards, Christian. > #define HWIP_MAX_INSTANCE 44 > > #define HW_ID_MAX 300 > diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c > b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c > index 2c9a0aa41e2d..7cdd4b9d08ba 100644 > --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c > +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c > @@ -29,6 +29,7 @@ > #include "gfx_v9_4_3.h" > #include "gfxhub_v1_2.h" > #include "sdma_v4_4_2.h" > +#include "aqua_vanjaram.h" > > #define XCP_INST_MASK(num_inst, xcp_id) \ > (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0) @@ > -1085,3 +1086,19 @@ ssize_t aqua_vanjaram_get_reg_state(struct > amdgpu_device *adev, > > return size; > } > + > +u32 aqua_vanjaram_normalize_reg_offset(enum amd_hw_ip_block_type > +hwip, u32 offset) { > + u32 normalized_offset; > + > + switch (hwip) { > + case GC_HWIP: > + normalized_offset = offset & 0xffff; > + break; > + default: > + normalized_offset = offset; > + break; > + } > + > + return normalized_offset; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h > b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h > new file mode 100644 > index 000000000000..8d1b7a89cb71 > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h > @@ -0,0 +1,28 @@ > +/* > + * Copyright 2024 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person > +obtaining a > + * copy of this software and associated documentation files (the > +"Software"), > + * to deal in the Software without restriction, including without > +limitation > + * the rights to use, copy, modify, merge, publish, distribute, > +sublicense, > + * and/or sell copies of the Software, and to permit persons to whom > +the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be > +included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > +EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > +MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > +SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > +DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > +OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > +OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + */ > +#ifndef __AQUA_VANJARAM_H__ > +#define __AQUA_VANJARAM_H__ > + > +u32 aqua_vanjaram_normalize_reg_offset(enum amd_hw_ip_block_type > +hwip, u32 offset); > + > +#endif > \ No newline at end of file > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 88b4644f8e96..19e4429db37c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -853,8 +853,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > */ > if (adev->gfx.kiq[inst].ring.sched.ready && > (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { > - uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; > - uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; > + > + /* Select lower 16 bits to write in local xcc */ > + if (AMDGPU_IS_GFXHUB(vmhub)) { > + req = NORMALIZE_XCC_REG_OFFSET(req); > + ack = NORMALIZE_XCC_REG_OFFSET(ack); > + } > > amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, > 1 << vmid, inst); > @@ -979,6 +983,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; > uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); > unsigned int eng = ring->vm_inv_eng; > + u32 low_distance, high_distance, req_offset, ack; > > /* > * It may lose gpuvm invalidate acknowldege state across > power-gating @@ -986,7 +991,18 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > * release after invalidation to avoid entering power gated state > * to WA the Issue > */ > + low_distance = hub->ctx0_ptb_addr_lo32 + (hub->ctx_addr_distance * vmid); > + high_distance = hub->ctx0_ptb_addr_hi32 + (hub->ctx_addr_distance * vmid); > + req_offset = hub->vm_inv_eng0_req + hub->eng_distance * eng; > + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; > > + /* Select lower 16 bits to write in local xcc */ > + if (AMDGPU_IS_GFXHUB(ring->vm_hub)) { > + low_distance = NORMALIZE_XCC_REG_OFFSET(low_distance); > + high_distance = NORMALIZE_XCC_REG_OFFSET(high_distance); > + req_offset = NORMALIZE_XCC_REG_OFFSET(req_offset); > + ack = NORMALIZE_XCC_REG_OFFSET(ack); > + } > /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ > if (use_semaphore) > /* a read return value of 1 means semaphore acuqire */ @@ -994,18 > +1010,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > hub->vm_inv_eng0_sem + > hub->eng_distance * eng, 0x1, 0x1); > > - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + > - (hub->ctx_addr_distance * vmid), > + amdgpu_ring_emit_wreg(ring, low_distance, > lower_32_bits(pd_addr)); > > - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + > - (hub->ctx_addr_distance * vmid), > + amdgpu_ring_emit_wreg(ring, high_distance, > upper_32_bits(pd_addr)); > > - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + > - hub->eng_distance * eng, > - hub->vm_inv_eng0_ack + > - hub->eng_distance * eng, > + amdgpu_ring_emit_reg_write_reg_wait(ring, req_offset, > + ack, > req, 1 << vmid); > > /* TODO: It needs to continue working on debugging with semaphore > for GFXHUB as well. */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c > b/drivers/gpu/drm/amd/amdgpu/soc15.c > index 8d16dacdc172..3a1fa2797f02 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > @@ -78,6 +78,7 @@ > #include "mxgpu_ai.h" > #include "amdgpu_ras.h" > #include "amdgpu_xgmi.h" > +#include "aqua_vanjaram.h" > #include <uapi/linux/kfd_ioctl.h> > > #define mmMP0_MISC_CGTT_CTRL0 0x01b9 > @@ -927,6 +928,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = > .query_video_codecs = &soc15_query_video_codecs, > .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, > .get_reg_state = &aqua_vanjaram_get_reg_state, > + .normalize_reg_offset = &aqua_vanjaram_normalize_reg_offset, > }; > > static int soc15_common_early_init(void *handle) diff --git > a/drivers/gpu/drm/amd/amdgpu/soc15_common.h > b/drivers/gpu/drm/amd/amdgpu/soc15_common.h > index 242b24f73c17..01afd1a24e8b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h > +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h > @@ -210,4 +210,7 @@ > #define WREG64_MCA(ext, mca_base, idx, val) \ > WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + > mca_base + (idx * 8), val) > > -#endif > +#define NORMALIZE_XCC_REG_OFFSET(offset) \ > + ((amdgpu_sriov_vf(adev) && adev->asic_funcs->normalize_reg_offset) ? \ > + adev->asic_funcs->normalize_reg_offset(GC_HWIP, offset) : offset) > +#endif > \ No newline at end of file