From: Ken Wang <Qingqing.Wang@xxxxxxx> Newer asics use 64 bit wptrs Signed-off-by: Ken Wang <Qingqing.Wang at amd.com> Reviewed-by: Alex Deucher <alexander.deucher at amd.com> Signed-off-by: Alex Deucher <alexander.deucher at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 +++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 12 +++++++----- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 12 +++++++----- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 18 ++++++++++-------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 ++++++++++++---------- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 11 ++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 15 ++++++++------- drivers/gpu/drm/amd/amdgpu/si_dma.c | 12 +++++++----- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 17 +++++++++-------- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 23 ++++++++++++----------- 15 files changed, 109 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f0e8b2a..a23842d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1594,9 +1594,12 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); */ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) { + u32 tmp; if (ring->count_dw <= 0) DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - ring->ring[ring->wptr++] = v; + tmp = lower_32_bits(ring->wptr++); + tmp &= ring->buf_mask; + ring->ring[tmp] = v; ring->wptr &= ring->ptr_mask; ring->count_dw--; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index cead88a..f0515b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -126,7 +126,7 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /* We pad to match fetch size */ count = ring->funcs->align_mask + 1 - - (ring->wptr & ring->funcs->align_mask); + (lower_32_bits(ring->wptr) & ring->funcs->align_mask); count %= ring->funcs->align_mask + 1; ring->funcs->insert_nop(ring, count); @@ -232,7 +232,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } amdgpu_ring_clear_ring(ring); } - ring->ptr_mask = (ring->ring_size / 4) - 1; + ring->buf_mask = (ring->ring_size / 4) - 1; + ring->ptr_mask = ring->support_64bit_ptrs ? + 0xffffffffffffffff : ring->buf_mask; + ring->max_dw = max_dw; if (amdgpu_debugfs_ring_init(adev, ring)) { @@ -284,7 +287,8 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; int r, i; - uint32_t value, result, early[3]; + uint32_t value, result; + uint64_t early[3]; if (*pos & 3 || size & 3) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index da702dc..9f2d605 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -98,8 +98,8 @@ struct amdgpu_ring_funcs { u32 nop; /* ring read/write ptr handling */ - u32 (*get_rptr)(struct amdgpu_ring *ring); - u32 (*get_wptr)(struct amdgpu_ring *ring); + u64 (*get_rptr)(struct amdgpu_ring *ring); + u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); @@ -148,13 +148,15 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; volatile uint32_t *ring; unsigned rptr_offs; - unsigned wptr; - unsigned wptr_old; + u64 wptr; + u64 wptr_old; + bool support_64bit_ptrs; unsigned ring_size; unsigned max_dw; int count_dw; uint64_t gpu_addr; - uint32_t ptr_mask; + uint64_t ptr_mask; + uint32_t buf_mask; bool ready; u32 idx; u32 me; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c33bc1b..b35ec4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -158,7 +158,7 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) * * Get the current rptr from the hardware (CIK+). */ -static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) { u32 rptr; @@ -174,7 +174,7 @@ static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (CIK+). */ -static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; @@ -194,7 +194,8 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -225,7 +226,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, u32 extra_bits = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); + cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ @@ -432,7 +433,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); /* enable DMA RB */ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], @@ -940,6 +941,7 @@ static int cik_sdma_sw_init(void *handle) ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; sprintf(ring->name, "sdma%d", i); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 02ca232..cb3d2816 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2192,12 +2192,12 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) return 0; } -static u32 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) +static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2215,7 +2215,7 @@ static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } @@ -2224,10 +2224,10 @@ static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->gfx.compute_ring[0]) { - WREG32(mmCP_RB1_WPTR, ring->wptr); + WREG32(mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB1_WPTR); } else if (ring == &adev->gfx.compute_ring[1]) { - WREG32(mmCP_RB2_WPTR, ring->wptr); + WREG32(mmCP_RB2_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB2_WPTR); } else { BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index cd6a6ec..e6a25ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2629,7 +2629,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); @@ -2658,12 +2658,12 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) return 0; } -static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2674,11 +2674,11 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } -static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->wptr_offs]; @@ -2689,8 +2689,8 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } /** @@ -3160,7 +3160,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; + mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); @@ -4708,6 +4708,7 @@ static int gfx_v7_0_sw_init(void *handle) ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; sprintf(ring->name, "gfx"); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); if (r) @@ -4732,6 +4733,7 @@ static int gfx_v7_0_sw_init(void *handle) ring->queue = i % 8; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; + ring->support_64bit_ptrs = false; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index af6294f..304f28c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2125,6 +2125,7 @@ static int gfx_v8_0_sw_init(void *handle) ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; } + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); if (r) @@ -2149,6 +2150,7 @@ static int gfx_v8_0_sw_init(void *handle) ring->queue = i % 8; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; + ring->support_64bit_ptrs = false; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type); @@ -4490,7 +4492,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); @@ -5204,7 +5206,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->cp_hqd_pq_wptr = ring->wptr; + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); @@ -6458,12 +6460,12 @@ static int gfx_v8_0_set_clockgating_state(void *handle, return 0; } -static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -6480,10 +6482,10 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } } @@ -6671,7 +6673,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, } } -static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->wptr_offs]; } @@ -6681,8 +6683,8 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index a881cf4..f5ee5f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -186,7 +186,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) * * Get the current rptr from the hardware (VI+). */ -static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->rptr_offs] >> 2; @@ -199,7 +199,7 @@ static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (VI+). */ -static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; @@ -220,7 +220,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); } static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -251,7 +251,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, u32 vmid = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -466,7 +466,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); @@ -946,6 +946,7 @@ static int sdma_v2_4_sw_init(void *handle) ring->ring_obj = NULL; ring->use_doorbell = false; sprintf(ring->name, "sdma%d", i); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 1df5b34..e715385 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -337,7 +337,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) * * Get the current rptr from the hardware (VI+). */ -static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->rptr_offs] >> 2; @@ -350,7 +350,7 @@ static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (VI+). */ -static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 wptr; @@ -380,12 +380,12 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; - WDOORBELL32(ring->doorbell_index, ring->wptr << 2); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr) << 2; + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); } } @@ -417,7 +417,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, u32 vmid = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -660,7 +660,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); @@ -1165,6 +1165,7 @@ static int sdma_v3_0_sw_init(void *handle) AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; sprintf(ring->name, "sdma%d", i); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 3372a07..256a77b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -37,12 +37,12 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); -static uint32_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs>>2]; } -static uint32_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; @@ -55,7 +55,8 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(DMA_RB_WPTR + sdma_offsets[me], + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -65,7 +66,7 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ - while ((ring->wptr & 7) != 5) + while ((lower_32_bits(ring->wptr) & 7) != 5) amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); @@ -184,7 +185,7 @@ static int si_dma_start(struct amdgpu_device *adev) WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); ring->ready = true; @@ -530,6 +531,7 @@ static int si_dma_sw_init(void *handle) ring->ring_obj = NULL; ring->use_doorbell = false; sprintf(ring->name, "sdma%d", i); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index b34cefc..580c658 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -55,7 +55,7 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -69,7 +69,7 @@ static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -87,7 +87,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v4_2_early_init(void *handle) @@ -121,6 +121,7 @@ static int uvd_v4_2_sw_init(void *handle) ring = &adev->uvd.ring; sprintf(ring->name, "uvd"); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); return r; @@ -367,7 +368,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0x0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); /* set the ring address */ WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index ad8c02e..7a4fca0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -51,7 +51,7 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -65,7 +65,7 @@ static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -83,7 +83,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v5_0_early_init(void *handle) @@ -117,6 +117,7 @@ static int uvd_v5_0_sw_init(void *handle) ring = &adev->uvd.ring; sprintf(ring->name, "uvd"); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); return r; @@ -424,7 +425,7 @@ static int uvd_v5_0_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_P(mmUVD_RBC_RB_CNTL, 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 18a6de4..2c5482e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -54,7 +54,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -68,7 +68,7 @@ static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -86,7 +86,7 @@ static void uvd_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v6_0_early_init(void *handle) @@ -120,6 +120,7 @@ static int uvd_v6_0_sw_init(void *handle) ring = &adev->uvd.ring; sprintf(ring->name, "uvd"); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); return r; @@ -521,7 +522,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index cb0b730f..9a4aa5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -52,7 +52,7 @@ static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev); * * Returns the current hardware read pointer */ -static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -69,7 +69,7 @@ static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -91,9 +91,9 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->vce.ring[0]) - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); } static int vce_v2_0_lmi_clean(struct amdgpu_device *adev) @@ -241,15 +241,15 @@ static int vce_v2_0_start(struct amdgpu_device *adev) vce_v2_0_mc_resume(adev); ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, ring->wptr); - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, ring->wptr); - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); @@ -446,6 +446,7 @@ static int vce_v2_0_sw_init(void *handle) for (i = 0; i < adev->vce.num_rings; i++) { ring = &adev->vce.ring[i]; sprintf(ring->name, "vce%d", i); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 93ec881..7877a51 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -73,7 +73,7 @@ static int vce_v3_0_wait_for_idle(void *handle); * * Returns the current hardware read pointer */ -static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -92,7 +92,7 @@ static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -116,11 +116,11 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->vce.ring[0]) - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else if (ring == &adev->vce.ring[1]) - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); else - WREG32(mmVCE_RB_WPTR3, ring->wptr); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); } static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) @@ -231,22 +231,22 @@ static int vce_v3_0_start(struct amdgpu_device *adev) int idx, r; ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, ring->wptr); - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, ring->wptr); - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); ring = &adev->vce.ring[2]; - WREG32(mmVCE_RB_RPTR3, ring->wptr); - WREG32(mmVCE_RB_WPTR3, ring->wptr); + WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); @@ -403,6 +403,7 @@ static int vce_v3_0_sw_init(void *handle) for (i = 0; i < adev->vce.num_rings; i++) { ring = &adev->vce.ring[i]; sprintf(ring->name, "vce%d", i); + ring->support_64bit_ptrs = false; r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); if (r) return r; -- 2.5.5