Am 09.03.2017 um 04:44 schrieb Alex Deucher: > From: Ken Wang <Qingqing.Wang at amd.com> > > Newer asics use 64 bit wptrs We need a better patch description. Newer asics doesn't use 64bit wptrs, but rather need them! E.g. if the wptr is now smaller than the rptr that doesn't indicate a wrap around any more. > > Signed-off-by: Ken Wang <Qingqing.Wang at amd.com> > Reviewed-by: Alex Deucher <alexander.deucher at amd.com> > Signed-off-by: Alex Deucher <alexander.deucher at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 +++++++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 12 +++++++----- > drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 12 +++++++----- > drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 10 +++++----- > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 18 ++++++++++-------- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 ++++++++++++---------- > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 11 ++++++----- > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 15 ++++++++------- > drivers/gpu/drm/amd/amdgpu/si_dma.c | 12 +++++++----- > drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 9 +++++---- > drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 9 +++++---- > drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 9 +++++---- > drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 17 +++++++++-------- > drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 23 ++++++++++++----------- > 15 files changed, 109 insertions(+), 85 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index f0e8b2a..a23842d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1594,9 +1594,12 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); > */ > static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) > { > + u32 tmp; Coding style we need a new line here. > if (ring->count_dw <= 0) > DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); > - ring->ring[ring->wptr++] = v; > + tmp = lower_32_bits(ring->wptr++); > + tmp &= ring->buf_mask; > + ring->ring[tmp] = v; I would just code that as "ring->ring[ring->wptr++ & ring->buf_mask]". Using lower_32_bits is mostly for documentation purpose when stitching together hardware packages, that doesn't apply here. > ring->wptr &= ring->ptr_mask; > ring->count_dw--; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > index cead88a..f0515b8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > @@ -126,7 +126,7 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) > > /* We pad to match fetch size */ > count = ring->funcs->align_mask + 1 - > - (ring->wptr & ring->funcs->align_mask); > + (lower_32_bits(ring->wptr) & ring->funcs->align_mask); Dito, just drop that change. The remaining lower_32_bits() bits usuages look valid to me. > count %= ring->funcs->align_mask + 1; > ring->funcs->insert_nop(ring, count); > > @@ -232,7 +232,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, > } > amdgpu_ring_clear_ring(ring); > } > - ring->ptr_mask = (ring->ring_size / 4) - 1; > + ring->buf_mask = (ring->ring_size / 4) - 1; > + ring->ptr_mask = ring->support_64bit_ptrs ? > + 0xffffffffffffffff : ring->buf_mask; > + > ring->max_dw = max_dw; > > if (amdgpu_debugfs_ring_init(adev, ring)) { > @@ -284,7 +287,8 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, > { > struct amdgpu_ring *ring = file_inode(f)->i_private; > int r, i; > - uint32_t value, result, early[3]; > + uint32_t value, result; > + uint64_t early[3]; Completely superfluous change, we should rather mask the values returned to userspace properly. > > if (*pos & 3 || size & 3) > return -EINVAL; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index da702dc..9f2d605 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -98,8 +98,8 @@ struct amdgpu_ring_funcs { > u32 nop; > > /* ring read/write ptr handling */ > - u32 (*get_rptr)(struct amdgpu_ring *ring); > - u32 (*get_wptr)(struct amdgpu_ring *ring); > + u64 (*get_rptr)(struct amdgpu_ring *ring); > + u64 (*get_wptr)(struct amdgpu_ring *ring); > void (*set_wptr)(struct amdgpu_ring *ring); > /* validating and patching of IBs */ > int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); > @@ -148,13 +148,15 @@ struct amdgpu_ring { > struct amdgpu_bo *ring_obj; > volatile uint32_t *ring; > unsigned rptr_offs; > - unsigned wptr; > - unsigned wptr_old; > + u64 wptr; > + u64 wptr_old; > + bool support_64bit_ptrs; That belongs into amdgpu_ring_funcs. Regards, Christian. > unsigned ring_size; > unsigned max_dw; > int count_dw; > uint64_t gpu_addr; > - uint32_t ptr_mask; > + uint64_t ptr_mask; > + uint32_t buf_mask; > bool ready; > u32 idx; > u32 me; > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > index c33bc1b..b35ec4a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > @@ -158,7 +158,7 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) > * > * Get the current rptr from the hardware (CIK+). > */ > -static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) > { > u32 rptr; > > @@ -174,7 +174,7 @@ static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) > * > * Get the current wptr from the hardware (CIK+). > */ > -static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; > @@ -194,7 +194,8 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; > > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], > + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); > } > > static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) > @@ -225,7 +226,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, > u32 extra_bits = vm_id & 0xf; > > /* IB packet must end on a 8 DW boundary */ > - cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); > + cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); > > amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); > amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ > @@ -432,7 +433,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) > WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); > > ring->wptr = 0; > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); > > /* enable DMA RB */ > WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], > @@ -940,6 +941,7 @@ static int cik_sdma_sw_init(void *handle) > ring = &adev->sdma.instance[i].ring; > ring->ring_obj = NULL; > sprintf(ring->name, "sdma%d", i); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 1024, > &adev->sdma.trap_irq, > (i == 0) ? > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c > index 02ca232..cb3d2816 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c > @@ -2192,12 +2192,12 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) > return 0; > } > > -static u32 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) > +static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) > { > return ring->adev->wb.wb[ring->rptr_offs]; > } > > -static u32 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) > +static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -2215,7 +2215,7 @@ static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > - WREG32(mmCP_RB0_WPTR, ring->wptr); > + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); > (void)RREG32(mmCP_RB0_WPTR); > } > > @@ -2224,10 +2224,10 @@ static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > > if (ring == &adev->gfx.compute_ring[0]) { > - WREG32(mmCP_RB1_WPTR, ring->wptr); > + WREG32(mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); > (void)RREG32(mmCP_RB1_WPTR); > } else if (ring == &adev->gfx.compute_ring[1]) { > - WREG32(mmCP_RB2_WPTR, ring->wptr); > + WREG32(mmCP_RB2_WPTR, lower_32_bits(ring->wptr)); > (void)RREG32(mmCP_RB2_WPTR); > } else { > BUG(); > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index cd6a6ec..e6a25ee 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -2629,7 +2629,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) > /* Initialize the ring buffer's read and write pointers */ > WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); > ring->wptr = 0; > - WREG32(mmCP_RB0_WPTR, ring->wptr); > + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); > > /* set the wb address wether it's enabled or not */ > rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); > @@ -2658,12 +2658,12 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) > return 0; > } > > -static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) > +static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) > { > return ring->adev->wb.wb[ring->rptr_offs]; > } > > -static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) > +static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -2674,11 +2674,11 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > - WREG32(mmCP_RB0_WPTR, ring->wptr); > + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); > (void)RREG32(mmCP_RB0_WPTR); > } > > -static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) > +static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) > { > /* XXX check if swapping is necessary on BE */ > return ring->adev->wb.wb[ring->wptr_offs]; > @@ -2689,8 +2689,8 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > > /* XXX check if swapping is necessary on BE */ > - adev->wb.wb[ring->wptr_offs] = ring->wptr; > - WDOORBELL32(ring->doorbell_index, ring->wptr); > + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); > } > > /** > @@ -3160,7 +3160,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) > > /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > ring->wptr = 0; > - mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; > + mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); > WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); > mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); > > @@ -4708,6 +4708,7 @@ static int gfx_v7_0_sw_init(void *handle) > ring = &adev->gfx.gfx_ring[i]; > ring->ring_obj = NULL; > sprintf(ring->name, "gfx"); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 1024, > &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); > if (r) > @@ -4732,6 +4733,7 @@ static int gfx_v7_0_sw_init(void *handle) > ring->queue = i % 8; > sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); > irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; > + ring->support_64bit_ptrs = false; > /* type-2 packets are deprecated on MEC, use type-3 instead */ > r = amdgpu_ring_init(adev, ring, 1024, > &adev->gfx.eop_irq, irq_type); > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index af6294f..304f28c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -2125,6 +2125,7 @@ static int gfx_v8_0_sw_init(void *handle) > ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; > } > > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, > AMDGPU_CP_IRQ_GFX_EOP); > if (r) > @@ -2149,6 +2150,7 @@ static int gfx_v8_0_sw_init(void *handle) > ring->queue = i % 8; > sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); > irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; > + ring->support_64bit_ptrs = false; > /* type-2 packets are deprecated on MEC, use type-3 instead */ > r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, > irq_type); > @@ -4490,7 +4492,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) > /* Initialize the ring buffer's read and write pointers */ > WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); > ring->wptr = 0; > - WREG32(mmCP_RB0_WPTR, ring->wptr); > + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); > > /* set the wb address wether it's enabled or not */ > rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); > @@ -5204,7 +5206,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) > > /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ > ring->wptr = 0; > - mqd->cp_hqd_pq_wptr = ring->wptr; > + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); > WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); > mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); > > @@ -6458,12 +6460,12 @@ static int gfx_v8_0_set_clockgating_state(void *handle, > return 0; > } > > -static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) > +static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) > { > return ring->adev->wb.wb[ring->rptr_offs]; > } > > -static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) > +static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -6480,10 +6482,10 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) > > if (ring->use_doorbell) { > /* XXX check if swapping is necessary on BE */ > - adev->wb.wb[ring->wptr_offs] = ring->wptr; > - WDOORBELL32(ring->doorbell_index, ring->wptr); > + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); > } else { > - WREG32(mmCP_RB0_WPTR, ring->wptr); > + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); > (void)RREG32(mmCP_RB0_WPTR); > } > } > @@ -6671,7 +6673,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > } > } > > -static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) > +static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) > { > return ring->adev->wb.wb[ring->wptr_offs]; > } > @@ -6681,8 +6683,8 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > > /* XXX check if swapping is necessary on BE */ > - adev->wb.wb[ring->wptr_offs] = ring->wptr; > - WDOORBELL32(ring->doorbell_index, ring->wptr); > + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); > } > > static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > index a881cf4..f5ee5f3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > @@ -186,7 +186,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) > * > * Get the current rptr from the hardware (VI+). > */ > -static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) > { > /* XXX check if swapping is necessary on BE */ > return ring->adev->wb.wb[ring->rptr_offs] >> 2; > @@ -199,7 +199,7 @@ static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) > * > * Get the current wptr from the hardware (VI+). > */ > -static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; > @@ -220,7 +220,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; > > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); > } > > static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) > @@ -251,7 +251,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, > u32 vmid = vm_id & 0xf; > > /* IB packet must end on a 8 DW boundary */ > - sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); > + sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); > > amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | > SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); > @@ -466,7 +466,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) > WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); > > ring->wptr = 0; > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); > > /* enable DMA RB */ > rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); > @@ -946,6 +946,7 @@ static int sdma_v2_4_sw_init(void *handle) > ring->ring_obj = NULL; > ring->use_doorbell = false; > sprintf(ring->name, "sdma%d", i); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 1024, > &adev->sdma.trap_irq, > (i == 0) ? > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > index 1df5b34..e715385 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > @@ -337,7 +337,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) > * > * Get the current rptr from the hardware (VI+). > */ > -static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) > { > /* XXX check if swapping is necessary on BE */ > return ring->adev->wb.wb[ring->rptr_offs] >> 2; > @@ -350,7 +350,7 @@ static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) > * > * Get the current wptr from the hardware (VI+). > */ > -static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > u32 wptr; > @@ -380,12 +380,12 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) > > if (ring->use_doorbell) { > /* XXX check if swapping is necessary on BE */ > - adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; > - WDOORBELL32(ring->doorbell_index, ring->wptr << 2); > + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr) << 2; > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); > } else { > int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; > > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); > } > } > > @@ -417,7 +417,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, > u32 vmid = vm_id & 0xf; > > /* IB packet must end on a 8 DW boundary */ > - sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); > + sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); > > amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | > SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); > @@ -660,7 +660,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) > WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); > > ring->wptr = 0; > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); > > doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); > > @@ -1165,6 +1165,7 @@ static int sdma_v3_0_sw_init(void *handle) > AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; > > sprintf(ring->name, "sdma%d", i); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 1024, > &adev->sdma.trap_irq, > (i == 0) ? > diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c > index 3372a07..256a77b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c > +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c > @@ -37,12 +37,12 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); > static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); > static void si_dma_set_irq_funcs(struct amdgpu_device *adev); > > -static uint32_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) > { > return ring->adev->wb.wb[ring->rptr_offs>>2]; > } > > -static uint32_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; > @@ -55,7 +55,8 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; > > - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); > + WREG32(DMA_RB_WPTR + sdma_offsets[me], > + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); > } > > static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, > @@ -65,7 +66,7 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, > /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. > * Pad as necessary with NOPs. > */ > - while ((ring->wptr & 7) != 5) > + while ((lower_32_bits(ring->wptr) & 7) != 5) > amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); > amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); > amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); > @@ -184,7 +185,7 @@ static int si_dma_start(struct amdgpu_device *adev) > WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); > > ring->wptr = 0; > - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); > + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); > WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); > > ring->ready = true; > @@ -530,6 +531,7 @@ static int si_dma_sw_init(void *handle) > ring->ring_obj = NULL; > ring->use_doorbell = false; > sprintf(ring->name, "sdma%d", i); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 1024, > &adev->sdma.trap_irq, > (i == 0) ? > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c > index b34cefc..580c658 100644 > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c > @@ -55,7 +55,7 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, > * > * Returns the current hardware read pointer > */ > -static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -69,7 +69,7 @@ static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) > * > * Returns the current hardware write pointer > */ > -static uint32_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -87,7 +87,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); > + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); > } > > static int uvd_v4_2_early_init(void *handle) > @@ -121,6 +121,7 @@ static int uvd_v4_2_sw_init(void *handle) > > ring = &adev->uvd.ring; > sprintf(ring->name, "uvd"); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); > > return r; > @@ -367,7 +368,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) > WREG32(mmUVD_RBC_RB_RPTR, 0x0); > > ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); > - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); > + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); > > /* set the ring address */ > WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr); > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c > index ad8c02e..7a4fca0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c > @@ -51,7 +51,7 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, > * > * Returns the current hardware read pointer > */ > -static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -65,7 +65,7 @@ static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) > * > * Returns the current hardware write pointer > */ > -static uint32_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -83,7 +83,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); > + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); > } > > static int uvd_v5_0_early_init(void *handle) > @@ -117,6 +117,7 @@ static int uvd_v5_0_sw_init(void *handle) > > ring = &adev->uvd.ring; > sprintf(ring->name, "uvd"); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); > > return r; > @@ -424,7 +425,7 @@ static int uvd_v5_0_start(struct amdgpu_device *adev) > WREG32(mmUVD_RBC_RB_RPTR, 0); > > ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); > - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); > + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); > > WREG32_P(mmUVD_RBC_RB_CNTL, 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); > > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c > index 18a6de4..2c5482e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c > @@ -54,7 +54,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, > * > * Returns the current hardware read pointer > */ > -static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -68,7 +68,7 @@ static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) > * > * Returns the current hardware write pointer > */ > -static uint32_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -86,7 +86,7 @@ static void uvd_v6_0_ring_set_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); > + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); > } > > static int uvd_v6_0_early_init(void *handle) > @@ -120,6 +120,7 @@ static int uvd_v6_0_sw_init(void *handle) > > ring = &adev->uvd.ring; > sprintf(ring->name, "uvd"); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); > > return r; > @@ -521,7 +522,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) > WREG32(mmUVD_RBC_RB_RPTR, 0); > > ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); > - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); > + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); > > WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); > > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c > index cb0b730f..9a4aa5e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c > @@ -52,7 +52,7 @@ static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev); > * > * Returns the current hardware read pointer > */ > -static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -69,7 +69,7 @@ static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) > * > * Returns the current hardware write pointer > */ > -static uint32_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -91,9 +91,9 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > > if (ring == &adev->vce.ring[0]) > - WREG32(mmVCE_RB_WPTR, ring->wptr); > + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); > else > - WREG32(mmVCE_RB_WPTR2, ring->wptr); > + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); > } > > static int vce_v2_0_lmi_clean(struct amdgpu_device *adev) > @@ -241,15 +241,15 @@ static int vce_v2_0_start(struct amdgpu_device *adev) > vce_v2_0_mc_resume(adev); > > ring = &adev->vce.ring[0]; > - WREG32(mmVCE_RB_RPTR, ring->wptr); > - WREG32(mmVCE_RB_WPTR, ring->wptr); > + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); > + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); > WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); > WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); > WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); > > ring = &adev->vce.ring[1]; > - WREG32(mmVCE_RB_RPTR2, ring->wptr); > - WREG32(mmVCE_RB_WPTR2, ring->wptr); > + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); > + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); > WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); > WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); > WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); > @@ -446,6 +446,7 @@ static int vce_v2_0_sw_init(void *handle) > for (i = 0; i < adev->vce.num_rings; i++) { > ring = &adev->vce.ring[i]; > sprintf(ring->name, "vce%d", i); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 512, > &adev->vce.irq, 0); > if (r) > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c > index 93ec881..7877a51 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c > @@ -73,7 +73,7 @@ static int vce_v3_0_wait_for_idle(void *handle); > * > * Returns the current hardware read pointer > */ > -static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) > +static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -92,7 +92,7 @@ static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) > * > * Returns the current hardware write pointer > */ > -static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) > +static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > > @@ -116,11 +116,11 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > > if (ring == &adev->vce.ring[0]) > - WREG32(mmVCE_RB_WPTR, ring->wptr); > + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); > else if (ring == &adev->vce.ring[1]) > - WREG32(mmVCE_RB_WPTR2, ring->wptr); > + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); > else > - WREG32(mmVCE_RB_WPTR3, ring->wptr); > + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); > } > > static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) > @@ -231,22 +231,22 @@ static int vce_v3_0_start(struct amdgpu_device *adev) > int idx, r; > > ring = &adev->vce.ring[0]; > - WREG32(mmVCE_RB_RPTR, ring->wptr); > - WREG32(mmVCE_RB_WPTR, ring->wptr); > + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); > + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); > WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); > WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); > WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); > > ring = &adev->vce.ring[1]; > - WREG32(mmVCE_RB_RPTR2, ring->wptr); > - WREG32(mmVCE_RB_WPTR2, ring->wptr); > + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); > + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); > WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); > WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); > WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); > > ring = &adev->vce.ring[2]; > - WREG32(mmVCE_RB_RPTR3, ring->wptr); > - WREG32(mmVCE_RB_WPTR3, ring->wptr); > + WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); > + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); > WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); > WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); > WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); > @@ -403,6 +403,7 @@ static int vce_v3_0_sw_init(void *handle) > for (i = 0; i < adev->vce.num_rings; i++) { > ring = &adev->vce.ring[i]; > sprintf(ring->name, "vce%d", i); > + ring->support_64bit_ptrs = false; > r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); > if (r) > return r;