On Tue, Jul 10, 2012 at 8:51 AM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > Before emitting any indirect buffer, emit the offset of the next > valid ring content if any. This allow code that want to resume > ring to resume ring right after ib that caused GPU lockup. > > v2: use scratch registers instead of storing it into memory > Why using scratch register ? To minimize bus activities ? > Signed-off-by: Jerome Glisse <jglisse@xxxxxxxxxx> > Signed-off-by: Christian König <deathsimple@xxxxxxxxxxx> > --- > drivers/gpu/drm/radeon/evergreen.c | 8 +++++++- > drivers/gpu/drm/radeon/ni.c | 11 ++++++++++- > drivers/gpu/drm/radeon/r600.c | 18 ++++++++++++++++-- > drivers/gpu/drm/radeon/radeon.h | 1 + > drivers/gpu/drm/radeon/radeon_ring.c | 4 ++++ > drivers/gpu/drm/radeon/rv770.c | 4 +++- > drivers/gpu/drm/radeon/si.c | 22 +++++++++++++++++++--- > 7 files changed, 60 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c > index f39b900..40de347 100644 > --- a/drivers/gpu/drm/radeon/evergreen.c > +++ b/drivers/gpu/drm/radeon/evergreen.c > @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > /* set to DX10/11 mode */ > radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); > radeon_ring_write(ring, 1); > - /* FIXME: implement */ > + > + if (ring->rptr_save_reg) { > + uint32_t next_rptr = ring->wptr + 2 + 4; > + radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); > + radeon_ring_write(ring, next_rptr); > + } > + > radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); > radeon_ring_write(ring, > #ifdef __BIG_ENDIAN > diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c > index f2afefb..6e3d448 100644 > --- a/drivers/gpu/drm/radeon/ni.c > +++ b/drivers/gpu/drm/radeon/ni.c > @@ -855,6 +855,13 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > /* set to DX10/11 mode */ > radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); > radeon_ring_write(ring, 1); > + > + if (ring->rptr_save_reg) { > + uint32_t next_rptr = ring->wptr + 2 + 4; I would rather also skip the surface sync so add another + 8 > + radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); > + radeon_ring_write(ring, next_rptr); > + } > + > radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); > radeon_ring_write(ring, > #ifdef __BIG_ENDIAN > @@ -981,8 +988,10 @@ static int cayman_cp_start(struct radeon_device *rdev) > > static void cayman_cp_fini(struct radeon_device *rdev) > { > + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > cayman_cp_enable(rdev, false); > - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); > + radeon_ring_fini(rdev, ring); > + radeon_scratch_free(rdev, ring->rptr_save_reg); > } > > int cayman_cp_resume(struct radeon_device *rdev) > diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c > index c808fa9..74fca15 100644 > --- a/drivers/gpu/drm/radeon/r600.c > +++ b/drivers/gpu/drm/radeon/r600.c > @@ -2155,18 +2155,27 @@ int r600_cp_resume(struct radeon_device *rdev) > void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size) > { > u32 rb_bufsz; > + int r; > > /* Align ring size */ > rb_bufsz = drm_order(ring_size / 8); > ring_size = (1 << (rb_bufsz + 1)) * 4; > ring->ring_size = ring_size; > ring->align_mask = 16 - 1; > + > + r = radeon_scratch_get(rdev, &ring->rptr_save_reg); > + if (r) { > + DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r); > + ring->rptr_save_reg = 0; > + } > } > > void r600_cp_fini(struct radeon_device *rdev) > { > + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > r600_cp_stop(rdev); > - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); > + radeon_ring_fini(rdev, ring); > + radeon_scratch_free(rdev, ring->rptr_save_reg); > } > > > @@ -2568,7 +2577,12 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > { > struct radeon_ring *ring = &rdev->ring[ib->ring]; > > - /* FIXME: implement */ > + if (ring->rptr_save_reg) { > + uint32_t next_rptr = ring->wptr + 2 + 4; > + radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); > + radeon_ring_write(ring, next_rptr); > + } > + > radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); > radeon_ring_write(ring, > #ifdef __BIG_ENDIAN > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 872270c..64d39ad 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -622,6 +622,7 @@ struct radeon_ring { > unsigned rptr; > unsigned rptr_offs; > unsigned rptr_reg; > + unsigned rptr_save_reg; > unsigned wptr; > unsigned wptr_old; > unsigned wptr_reg; > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c > index 0873834..ce8eb9d 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -451,6 +451,10 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) > count = (ring->ring_size / 4) - ring->ring_free_dw; > seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg)); > seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg)); > + if (ring->rptr_save_reg) { > + seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg, > + RREG32(ring->rptr_save_reg)); > + } > seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr); > seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr); > seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); > diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c > index b4b1256..eb4704e 100644 > --- a/drivers/gpu/drm/radeon/rv770.c > +++ b/drivers/gpu/drm/radeon/rv770.c > @@ -358,8 +358,10 @@ static int rv770_cp_load_microcode(struct radeon_device *rdev) > > void r700_cp_fini(struct radeon_device *rdev) > { > + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > r700_cp_stop(rdev); > - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); > + radeon_ring_fini(rdev, ring); > + radeon_scratch_free(rdev, ring->rptr_save_reg); > } > > /* > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c > index f61b550..c7ace21 100644 > --- a/drivers/gpu/drm/radeon/si.c > +++ b/drivers/gpu/drm/radeon/si.c > @@ -1765,6 +1765,12 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > struct radeon_ring *ring = &rdev->ring[ib->ring]; > u32 header; > > + if (ring->rptr_save_reg) { > + uint32_t next_rptr = ring->wptr + 2 + 4; Same as ni I would rather also skip the surface sync so add another + 8 > + radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); > + radeon_ring_write(ring, next_rptr); > + } > + > if (ib->is_const_ib) > header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); > else > @@ -1917,10 +1923,20 @@ static int si_cp_start(struct radeon_device *rdev) > > static void si_cp_fini(struct radeon_device *rdev) > { > + struct radeon_ring *ring; > si_cp_enable(rdev, false); > - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); > - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); > - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); > + > + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > + radeon_ring_fini(rdev, ring); > + radeon_scratch_free(rdev, ring->rptr_save_reg); > + > + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; > + radeon_ring_fini(rdev, ring); > + radeon_scratch_free(rdev, ring->rptr_save_reg); > + > + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; > + radeon_ring_fini(rdev, ring); > + radeon_scratch_free(rdev, ring->rptr_save_reg); > } > > static int si_cp_resume(struct radeon_device *rdev) > -- > 1.7.9.5 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel