[AMD Official Use Only - General]
Yes, I have tested this.
The (!amdgpu_sriov_runtime(adev) && adev->gfx.rlc.rlcg_reg_access_supported) conditions in amdgpu_device_xcc_wreg will guarantee it will go into amdgpu_virt_rlcg_reg_rw instead of looping back to WREG32_XCC.
Thanks,
Victor
From: Lazar, Lijo <Lijo.Lazar@xxxxxxx>
Sent: Thursday, October 26, 2023 4:16 AM To: Lu, Victor Cheng Chi (Victor) <VictorChengChi.Lu@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx> Cc: Ming, Davis <Davis.Ming@xxxxxxx> Subject: Re: [PATCH 3/5] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v3) On 10/26/2023 2:22 AM, Victor Lu wrote: > amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0. > > Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC > and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter. > > v3: use W/RREG32_XCC to handle non-kiq case > > v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters > of amdgpu_device_wreg/rreg > > Signed-off-by: Victor Lu <victorchengchi.lu@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++- > .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 2 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 84 ++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +- > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +- > 8 files changed, 107 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index a2e8c2b60857..09989ebb5da3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1168,11 +1168,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, > uint32_t reg, uint32_t acc_flags); > u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, > u64 reg_addr); > +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, > + uint32_t reg, uint32_t acc_flags, > + uint32_t xcc_id); > void amdgpu_device_wreg(struct amdgpu_device *adev, > uint32_t reg, uint32_t v, > uint32_t acc_flags); > void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, > u64 reg_addr, u32 reg_data); > +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, > + uint32_t reg, uint32_t v, > + uint32_t acc_flags, > + uint32_t xcc_id); > void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, > uint32_t reg, uint32_t v, uint32_t xcc_id); > void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); > @@ -1213,8 +1220,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); > #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) > #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) > > -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) > -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) > +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0) > +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0) > > #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) > #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) > @@ -1224,6 +1231,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); > #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) > #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) > #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) > +#define RREG32_XCC(reg, flag, inst) amdgpu_device_xcc_rreg(adev, (reg), flag, inst) > +#define WREG32_XCC(reg, v, flag, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), flag, inst) > #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) > #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) > #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c > index 490c8f5ddb60..c94df54e2657 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c > @@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, > hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); > > for (reg = hqd_base; reg <= hqd_end; reg++) > - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); > + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst); > > > /* Activate doorbell logic before triggering WPTR poll. */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index 51011e8ee90d..47c8c334c779 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, > > for (reg = hqd_base; > reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) > - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); > + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst); > > > /* Activate doorbell logic before triggering WPTR poll. */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 7ec32b44df05..9a35088b990a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -471,7 +471,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, > if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && > amdgpu_sriov_runtime(adev) && > down_read_trylock(&adev->reset_domain->sem)) { > - ret = amdgpu_kiq_rreg(adev, reg); > + ret = amdgpu_kiq_rreg(adev, reg, 0); > up_read(&adev->reset_domain->sem); > } else { > ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); > @@ -508,6 +508,48 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) > BUG(); > } > > + > +/** > + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register > + * > + * @adev: amdgpu_device pointer > + * @reg: dword aligned register offset > + * @acc_flags: access flags which require special behavior > + * @xcc_id: xcc accelerated compute core id > + * > + * Returns the 32 bit value from the offset specified. > + */ > +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, > + uint32_t reg, uint32_t acc_flags, > + uint32_t xcc_id) > +{ > + uint32_t ret; > + > + if (amdgpu_device_skip_hw_access(adev)) > + return 0; > + > + if ((reg * 4) < adev->rmmio_size) { > + if ((acc_flags & AMDGPU_REGS_RLC) && > + (!amdgpu_sriov_runtime(adev)) && > + adev->gfx.rlc.rlcg_reg_access_supported) { > + amdgpu_sriov_rreg(adev, reg, acc_flags, GC_HWIP, xcc_id); > + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && > + amdgpu_sriov_runtime(adev) && > + down_read_trylock(&adev->reset_domain->sem)) { > + ret = amdgpu_kiq_rreg(adev, reg, xcc_id); > + up_read(&adev->reset_domain->sem); > + } else { > + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); > + } > + } else { > + ret = adev->pcie_rreg(adev, reg * 4); > + } > + > + trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); > + > + return ret; > +} > + > /* > * MMIO register write with bytes helper functions > * @offset:bytes offset from MMIO start > @@ -555,7 +597,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, > if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && > amdgpu_sriov_runtime(adev) && > down_read_trylock(&adev->reset_domain->sem)) { > - amdgpu_kiq_wreg(adev, reg, v); > + amdgpu_kiq_wreg(adev, reg, v, 0); > up_read(&adev->reset_domain->sem); > } else { > writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); > @@ -596,6 +638,44 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, > } > } > > +/** > + * amdgpu_device_wreg - write to a memory mapped IO or indirect register with specific XCC > + * > + * @adev: amdgpu_device pointer > + * @reg: dword aligned register offset > + * @v: 32 bit value to write to the register > + * @acc_flags: access flags which require special behavior > + * @xcc_id: xcc accelerated compute core id > + * > + * Writes the value specified to the offset specified. > + */ > +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, > + uint32_t reg, uint32_t v, > + uint32_t acc_flags, uint32_t xcc_id) > +{ > + if (amdgpu_device_skip_hw_access(adev)) > + return; > + > + if ((reg * 4) < adev->rmmio_size) { > + if ((acc_flags & AMDGPU_REGS_RLC) && > + (!amdgpu_sriov_runtime(adev)) && > + adev->gfx.rlc.rlcg_reg_access_supported) { > + amdgpu_sriov_wreg(adev, reg, v, acc_flags, GC_HWIP, xcc_id); I see this path WREG32_XCC -> amdgpu_device_xcc_wreg -> amdgpu_sriov_wreg -> WREG32_XCC Similar for rreg. I'm not able to work out the RLC/sriov runtime conditions. Is this tested before submitting? Thanks, Lijo > + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && > + amdgpu_sriov_runtime(adev) && > + down_read_trylock(&adev->reset_domain->sem)) { > + amdgpu_kiq_wreg(adev, reg, v, xcc_id); > + up_read(&adev->reset_domain->sem); > + } else { > + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); > + } > + } else { > + adev->pcie_wreg(adev, reg * 4, v); > + } > + > + trace_amdgpu_device_wreg(adev->pdev->device, reg, v); > +} > + > /** > * amdgpu_device_indirect_rreg - read an indirect register > * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index c92e0aba69e1..60ae4bfdc7f5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -929,12 +929,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, > func(adev, ras_error_status, i); > } > > -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id) > { > signed long r, cnt = 0; > unsigned long flags; > uint32_t seq, reg_val_offs = 0, value = 0; > - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; > struct amdgpu_ring *ring = &kiq->ring; > > if (amdgpu_device_skip_hw_access(adev)) > @@ -997,12 +997,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > return ~0; > } > > -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) > +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) > { > signed long r, cnt = 0; > unsigned long flags; > uint32_t seq; > - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; > + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; > struct amdgpu_ring *ring = &kiq->ring; > > BUG_ON(!ring->funcs->emit_wreg); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 7088c5015675..f23bafec71c5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -521,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, > int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, > struct amdgpu_irq_src *source, > struct amdgpu_iv_entry *entry); > -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); > -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); > +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id); > +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); > int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); > void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index a0aa624f5a92..c6c8f4fed0c1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -1076,7 +1076,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev, > if (acc_flags & AMDGPU_REGS_NO_KIQ) > WREG32_NO_KIQ(offset, value); > else > - WREG32(offset, value); > + WREG32_XCC(offset, value, acc_flags, xcc_id); > } > > u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, > @@ -1091,5 +1091,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, > if (acc_flags & AMDGPU_REGS_NO_KIQ) > return RREG32_NO_KIQ(offset); > else > - return RREG32(offset); > + return RREG32_XCC(offset, acc_flags, xcc_id); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > index 386804f2e95c..b24db7974311 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > @@ -2739,16 +2739,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( > > switch (state) { > case AMDGPU_IRQ_STATE_DISABLE: > - mec_int_cntl = RREG32(mec_int_cntl_reg); > + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, AMDGPU_REGS_RLC, xcc_id); > mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, > TIME_STAMP_INT_ENABLE, 0); > - WREG32(mec_int_cntl_reg, mec_int_cntl); > + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, AMDGPU_REGS_RLC, xcc_id); > break; > case AMDGPU_IRQ_STATE_ENABLE: > - mec_int_cntl = RREG32(mec_int_cntl_reg); > + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, AMDGPU_REGS_RLC, xcc_id); > mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, > TIME_STAMP_INT_ENABLE, 1); > - WREG32(mec_int_cntl_reg, mec_int_cntl); > + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, AMDGPU_REGS_RLC, xcc_id); > break; > default: > break; |