See inline. On 2019-04-23 3:23 p.m., Zeng, Oak wrote: > Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL > to an empty page in mmio space. We will later map this page to process > space so application can flush hdp. This can't be done properly at > those registers' original location because it will expose more than > desired registers to process space. > > v2: Use explicit register hole location > v3: Moved remapped hdp registers into adev struct > v4: Use more generic name for remapped page > Expose register offset in kfd_ioctl.h > v5: Move hdp register remap function to nbio ip function > > Change-Id: Ia8d27c0c9a082711d16bbf55602bf5712a47b6d6 > Signed-off-by: Oak Zeng <Oak.Zeng@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++++++ > drivers/gpu/drm/amd/amdgpu/cik.c | 1 + > drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 15 ++++++++++++--- > drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 15 ++++++++++++--- > drivers/gpu/drm/amd/amdgpu/si.c | 1 + > drivers/gpu/drm/amd/amdgpu/soc15.c | 11 +++++++++++ > drivers/gpu/drm/amd/amdgpu/vi.c | 1 + > include/uapi/linux/kfd_ioctl.h | 7 +++++++ > 8 files changed, 52 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index bc96ec4..e16dcee 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -642,6 +642,11 @@ struct nbio_hdp_flush_reg { > u32 ref_and_mask_sdma1; > }; > > +struct amdgpu_mmio_remap { > + u32 reg_offset; > + resource_size_t bus_addr; > +}; > + > struct amdgpu_nbio_funcs { > const struct nbio_hdp_flush_reg *hdp_flush_reg; > u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); > @@ -669,6 +674,7 @@ struct amdgpu_nbio_funcs { > void (*ih_control)(struct amdgpu_device *adev); > void (*init_registers)(struct amdgpu_device *adev); > void (*detect_hw_virt)(struct amdgpu_device *adev); > + void (*remap_hdp_registers)(struct amdgpu_device *adev); > }; > > struct amdgpu_df_funcs { > @@ -767,6 +773,7 @@ struct amdgpu_device { > void __iomem *rmmio; > /* protects concurrent MM_INDEX/DATA based register access */ > spinlock_t mmio_idx_lock; > + struct amdgpu_mmio_remap rmmio_remap; > /* protects concurrent SMC based register access */ > spinlock_t smc_idx_lock; > amdgpu_rreg_t smc_rreg; > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c > index 07c1f23..3f7ec6a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > @@ -1827,6 +1827,7 @@ static int cik_common_early_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > + adev->rmmio_remap.bus_addr = ULLONG_MAX; > adev->smc_rreg = &cik_smc_rreg; > adev->smc_wreg = &cik_smc_wreg; > adev->pcie_rreg = &cik_pcie_rreg; > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > index 1cdb98a..83f1f75 100644 > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c > @@ -29,9 +29,18 @@ > #include "nbio/nbio_7_0_sh_mask.h" > #include "nbio/nbio_7_0_smn.h" > #include "vega10_enum.h" > +#include <uapi/linux/kfd_ioctl.h> > > #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c > > +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev) > +{ > + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, > + adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL); I don't think this does what you intend. I think + binds stronger than <<, so you should write this as (adev->rmmio_remap.reg_offset << 2) + HDP_MEM_FLUSH_CNTL > + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, > + adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL); Same as above. > +} > + > static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) > { > u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); > @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, > struct amdgpu_ring *ring) > { > if (!ring || !ring->funcs->emit_wreg) > - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); > + WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); Are you sure this is correct? As I understand it from the above, adev->rmmio_remap.reg_offset is in dwords, HDP_MEM_FLUSH_CNTL is in bytes. Something will need to be shifted. > else > - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( > - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); > + amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); Same as above. > } > > static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) > @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { > .ih_control = nbio_v7_0_ih_control, > .init_registers = nbio_v7_0_init_registers, > .detect_hw_virt = nbio_v7_0_detect_hw_virt, > + .remap_hdp_registers = nbio_v7_0_remap_hdp_registers, > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c > index c69d515..fa67772 100644 > --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c > +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c > @@ -27,9 +27,18 @@ > #include "nbio/nbio_7_4_offset.h" > #include "nbio/nbio_7_4_sh_mask.h" > #include "nbio/nbio_7_4_0_smn.h" > +#include <uapi/linux/kfd_ioctl.h> > > #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c > > +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) > +{ > + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, > + adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL); > + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, > + adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL); Operator precedence. See above. > +} > + > static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) > { > u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); > @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, > struct amdgpu_ring *ring) > { > if (!ring || !ring->funcs->emit_wreg) > - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); > + WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); Are adev->rmmio_remap.reg_offset and HDP_MEM_FLUSH_CNTL in the same units? > else > - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( > - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); > + amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); > } > > static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) > @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { > .ih_control = nbio_v7_4_ih_control, > .init_registers = nbio_v7_4_init_registers, > .detect_hw_virt = nbio_v7_4_detect_hw_virt, > + .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c > index 9d8df68..c6b89c2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/si.c > +++ b/drivers/gpu/drm/amd/amdgpu/si.c > @@ -1405,6 +1405,7 @@ static int si_common_early_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > + adev->rmmio_remap.bus_addr = ULLONG_MAX; > adev->smc_rreg = &si_smc_rreg; > adev->smc_wreg = &si_smc_wreg; > adev->pcie_rreg = &si_pcie_rreg; > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c > index bdb5ad9..3685944 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > @@ -44,6 +44,7 @@ > #include "smuio/smuio_9_0_offset.h" > #include "smuio/smuio_9_0_sh_mask.h" > #include "nbio/nbio_7_0_default.h" > +#include "nbio/nbio_7_0_offset.h" > #include "nbio/nbio_7_0_sh_mask.h" > #include "nbio/nbio_7_0_smn.h" > #include "mp/mp_9_0_offset.h" > @@ -64,6 +65,7 @@ > #include "dce_virtual.h" > #include "mxgpu_ai.h" > #include "amdgpu_smu.h" > +#include <uapi/linux/kfd_ioctl.h> > > #define mmMP0_MISC_CGTT_CTRL0 0x01b9 > #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 > @@ -777,8 +779,11 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = > > static int soc15_common_early_init(void *handle) > { > +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > + adev->rmmio_remap.reg_offset = (MMIO_REG_HOLE_OFFSET) >> 2; Why is this shifted? I think this creates some of the confusion above where things have different units. > + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; > adev->smc_rreg = NULL; > adev->smc_wreg = NULL; > adev->pcie_rreg = &soc15_pcie_rreg; > @@ -1007,6 +1012,12 @@ static int soc15_common_hw_init(void *handle) > soc15_program_aspm(adev); > /* setup nbio registers */ > adev->nbio_funcs->init_registers(adev); > + /* remap HDP registers to a hole in mmio space, > + * for the purpose of expose those registers > + * to process space > + */ > + if (adev->nbio_funcs->remap_hdp_registers) > + adev->nbio_funcs->remap_hdp_registers(adev); > /* enable the doorbell aperture */ > soc15_enable_doorbell_aperture(adev, true); > /* HW doorbell routing policy: doorbell writing not > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c > index 5e5b42a..44565b23 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c > @@ -1037,6 +1037,7 @@ static int vi_common_early_init(void *handle) > adev->smc_rreg = &vi_smc_rreg; > adev->smc_wreg = &vi_smc_wreg; > } > + adev->rmmio_remap.bus_addr = ULLONG_MAX; > adev->pcie_rreg = &vi_pcie_rreg; > adev->pcie_wreg = &vi_pcie_wreg; > adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg; > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h > index dc067ed..7524e6e 100644 > --- a/include/uapi/linux/kfd_ioctl.h > +++ b/include/uapi/linux/kfd_ioctl.h > @@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args { > __u32 dmabuf_fd; /* to KFD */ > }; > > +/* Register offset inside the remapped mmio page > + */ > +enum kfd_mmio_remap { > + HDP_MEM_FLUSH_CNTL = 0, This needs some prefix to disambiguate. Something to make it clear that this is not the MMIO register address, but an offset in a remapped address range. This is a bit verbose, but it's the best I can come up with: KFD_MMIO_REMAP_HPD_MEM_FLUSH_CNTL = 0, KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, Regards, Felix > + HDP_REG_FLUSH_CNTL = 4, > +}; > + > #define AMDKFD_IOCTL_BASE 'K' > #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) > #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx