Move the CSA area to the top of the VA space to avoid clashing with HMM/ATC in the lower range on GFX9. v2: wrong sign noticed by Roger, rebase on CSA_VADDR cleanup, handle VA hole on GFX9 as well. Signed-off-by: Christian König <christian.koenig at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 24 ++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index e7dfb7b44b4b..b832651d2137 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -24,6 +24,18 @@ #include "amdgpu.h" #define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) +{ + uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + + addr -= AMDGPU_VA_RESERVED_SIZE; + + if (addr >= AMDGPU_VA_HOLE_START) + addr |= AMDGPU_VA_HOLE_END; + + return addr; +} + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { /* By now all MMIO pages except mailbox are blocked */ @@ -55,14 +67,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) { /* * amdgpu_map_static_csa should be called during amdgpu_vm_init - * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE" - * to this VM, and each command submission of GFX should use this virtual - * address within META_DATA init package to support SRIOV gfx preemption. + * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command + * submission of GFX should use this virtual address within META_DATA init + * package to support SRIOV gfx preemption. */ - int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry pd; @@ -90,7 +102,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); @@ -99,7 +111,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, + r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 6a83425aa9ed..880ac113a3a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -251,8 +251,7 @@ struct amdgpu_virt { uint32_t gim_feature; }; -#define AMDGPU_CSA_SIZE (8 * 1024) -#define AMDGPU_CSA_VADDR (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE) +#define AMDGPU_CSA_SIZE (8 * 1024) #define amdgpu_sriov_enabled(adev) \ ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) @@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void) } struct amdgpu_vm; + +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); int amdgpu_allocate_static_csa(struct amdgpu_device *adev); int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 530d2ea1b73c..6482efec47a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7147,11 +7147,11 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) } ce_payload = {}; if (ring->adev->virt.chained_ib_support) { - ce_payload_addr = AMDGPU_CSA_VADDR + + ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; } else { - ce_payload_addr = AMDGPU_CSA_VADDR + + ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + offsetof(struct vi_gfx_meta_data, ce_payload); cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; } @@ -7175,7 +7175,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) struct vi_de_ib_state_chained_ib chained; } de_payload = {}; - csa_addr = AMDGPU_CSA_VADDR; + csa_addr = amdgpu_csa_vaddr(ring->adev); gds_addr = csa_addr + 4096; if (ring->adev->virt.chained_ib_support) { de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 720d15f1324e..1363e7ddd04a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3873,7 +3873,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) int cnt; cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - csa_addr = AMDGPU_CSA_VADDR; + csa_addr = amdgpu_csa_vaddr(ring->adev); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -3891,7 +3891,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) uint64_t csa_addr, gds_addr; int cnt; - csa_addr = AMDGPU_CSA_VADDR; + csa_addr = amdgpu_csa_vaddr(ring->adev); gds_addr = csa_addr + 4096; de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); -- 2.14.1