Sorry for the confusion. I misread the code, but it still seems to me it is a valid issue. What the patch tries to do is to fix the following pattern: - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); I agree with you that ring->wptr is an offset to the ring. Just looking at the above lines it seems that they are incorrect when ring->wptr is larger than 1GB. As you pointed out that ring->wptr cannot be larger than (1 << 24), it can be resolved via either (1) fixing the patterns in the provided patch, or (2) clamping the results to (1 << 24) - 1 and getting rid of lower_32_bits() / higher_32_bits() at all. What's your suggestion of moving forward? Thanks, Haohui On Mon, Apr 25, 2022 at 7:02 PM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > Am 25.04.22 um 11:15 schrieb Haohui Mai: > > Computing the address of the doorbell should be done before instead of after > > separating the 64-bit address into the higher and lower half. The > > current code sets the MMIO registers incorrectly if the address of the > > doorbell is above 1G. > > That doesn't make any sense at all. The address of the doorbell is > completely irrelevant to the value you write into it. > > What we could do is to stop using the lower_32_bits() function, since > the WPTR can't handle more than 16, 20 or 24 bits (IIRC) depending on hw > generation anyway. > > Regards, > Christian. > > > > > Signed-off-by: Haohui Mai <ricetons@xxxxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++-- > > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- > > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++---- > > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- > > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++---- > > drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++---- > > drivers/gpu/drm/amd/amdgpu/si_dma.c | 4 ++-- > > 7 files changed, 20 insertions(+), 20 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > index c8ebd108548d..df863d346995 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) > > struct amdgpu_device *adev = ring->adev; > > > > WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], > > - (lower_32_bits(ring->wptr) << 2) & 0x3fffc); > > + (lower_32_bits(ring->wptr << 2)) & 0x3fffc); > > } > > > > static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) > > @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) > > WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); > > > > ring->wptr = 0; > > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); > > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2)); > > > > /* enable DMA RB */ > > WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > > index 1d8bbcbd7a37..b83fd00466fe 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > > @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct > > amdgpu_ring *ring) > > { > > struct amdgpu_device *adev = ring->adev; > > > > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], > > lower_32_bits(ring->wptr) << 2); > > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], > > lower_32_bits(ring->wptr << 2)); > > } > > > > static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) > > @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) > > WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); > > > > ring->wptr = 0; > > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); > > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2)); > > > > /* enable DMA RB */ > > rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > > index 4ef4feff5649..557a7d5174b0 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > > @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct > > amdgpu_ring *ring) > > if (ring->use_doorbell) { > > u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; > > /* XXX check if swapping is necessary on BE */ > > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > > - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); > > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2))); > > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2)); > > } else if (ring->use_pollmem) { > > u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; > > > > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); > > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2))); > > } else { > > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], > > lower_32_bits(ring->wptr) << 2); > > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], > > lower_32_bits(ring->wptr << 2)); > > } > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > > index d7e8f7232364..ff86c43b63d1 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > > @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct > > amdgpu_ring *ring) > > > > DRM_DEBUG("Using doorbell -- " > > "wptr_offs == 0x%08x " > > - "lower_32_bits(ring->wptr) << 2 == 0x%08x " > > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", > > + "lower_32_bits(ring->wptr << 2) == 0x%08x " > > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", > > ring->wptr_offs, > > lower_32_bits(ring->wptr << 2), > > upper_32_bits(ring->wptr << 2)); > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > > index a8d49c005f73..627eb1f147c2 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > > @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct > > amdgpu_ring *ring) > > if (ring->use_doorbell) { > > DRM_DEBUG("Using doorbell -- " > > "wptr_offs == 0x%08x " > > - "lower_32_bits(ring->wptr) << 2 == 0x%08x " > > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", > > + "lower_32_bits(ring->wptr << 2) == 0x%08x " > > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", > > ring->wptr_offs, > > lower_32_bits(ring->wptr << 2), > > upper_32_bits(ring->wptr << 2)); > > @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) > > > > if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write > > for wptr */ > > WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), > > - lower_32_bits(ring->wptr) << 2); > > + lower_32_bits(ring->wptr << 2)); > > WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), > > - upper_32_bits(ring->wptr) << 2); > > + upper_32_bits(ring->wptr << 2)); > > } > > > > doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, > > mmSDMA0_GFX_DOORBELL)); > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > index 824eace69884..a5eb82bfeaa8 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct > > amdgpu_ring *ring) > > if (ring->use_doorbell) { > > DRM_DEBUG("Using doorbell -- " > > "wptr_offs == 0x%08x " > > - "lower_32_bits(ring->wptr) << 2 == 0x%08x " > > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", > > + "lower_32_bits(ring->wptr << 2) == 0x%08x " > > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", > > ring->wptr_offs, > > lower_32_bits(ring->wptr << 2), > > upper_32_bits(ring->wptr << 2)); > > @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) > > WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, > > mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); > > > > if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write > > for wptr */ > > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), > > lower_32_bits(ring->wptr) << 2); > > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), > > upper_32_bits(ring->wptr) << 2); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), > > lower_32_bits(ring->wptr << 2)); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), > > upper_32_bits(ring->wptr << 2)); > > } > > > > doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, > > mmSDMA0_GFX_DOORBELL)); > > diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c > > b/drivers/gpu/drm/amd/amdgpu/si_dma.c > > index 195b45bcb8ad..0af11d3b00e7 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c > > +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c > > @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) > > u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; > > > > WREG32(DMA_RB_WPTR + sdma_offsets[me], > > - (lower_32_bits(ring->wptr) << 2) & 0x3fffc); > > + (lower_32_bits(ring->wptr << 2)) & 0x3fffc); > > } > > > > static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, > > @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev) > > WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); > > > > ring->wptr = 0; > > - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); > > + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2)); > > WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); > > > > ring->sched.ready = true; > > -- > > 2.25.1 >