Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL to an empty page in mmio space. We will later map this page to process space so application can flush hdp. This can't be done properly at those registers' original location because it will expose more than desired registers to process space. v2: Use explicit register hole location v3: Moved remapped hdp registers into adev struct v4: Use more generic name for remapped page Expose register offset in kfd_ioctl.h Change-Id: Ia8d27c0c9a082711d16bbf55602bf5712a47b6d6 Signed-off-by: Oak Zeng <Oak.Zeng@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++++++ drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/soc15.c | 18 ++++++++++++++++++ include/uapi/linux/kfd_ioctl.h | 7 +++++++ 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bc96ec4..d71aa6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -642,6 +642,11 @@ struct nbio_hdp_flush_reg { u32 ref_and_mask_sdma1; }; +struct amdgpu_mmio_remap { + u32 reg_offset; + resource_size_t bus_addr; +}; + struct amdgpu_nbio_funcs { const struct nbio_hdp_flush_reg *hdp_flush_reg; u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); @@ -767,6 +772,7 @@ struct amdgpu_device { void __iomem *rmmio; /* protects concurrent MM_INDEX/DATA based register access */ spinlock_t mmio_idx_lock; + struct amdgpu_mmio_remap rmmio_remap; /* protects concurrent SMC based register access */ spinlock_t smc_idx_lock; amdgpu_rreg_t smc_rreg; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1cdb98a..afd1586 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -29,6 +29,7 @@ #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "vega10_enum.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c @@ -55,10 +56,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); } static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c69d515..c211b1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -27,6 +27,7 @@ #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c @@ -53,10 +54,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0); } static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index bdb5ad9..291290d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -44,6 +44,7 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "mp/mp_9_0_offset.h" @@ -64,6 +65,7 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" +#include <uapi/linux/kfd_ioctl.h> #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 @@ -775,6 +777,21 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .need_reset_on_init = &soc15_need_reset_on_init, }; +static void soc15_remap_hdp_coherency_registers(struct amdgpu_device *adev) +{ +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + /* Remap hdp coherency registers to a hole in register space, + * for the purpose of mapping them to process space(so process + * can flush hdp) + */ + adev->rmmio_remap.reg_offset = (MMIO_REG_HOLE_OFFSET) >> 2; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL); +} + static int soc15_common_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -794,6 +811,7 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = 0xFF; + soc15_remap_hdp_coherency_registers(adev); switch (adev->asic_type) { case CHIP_VEGA10: adev->asic_funcs = &soc15_asic_funcs; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index dc067ed..7524e6e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* Register offset inside the remapped mmio page + */ +enum kfd_mmio_remap { + HDP_MEM_FLUSH_CNTL = 0, + HDP_REG_FLUSH_CNTL = 4, +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx