From: Michel Dänzer <michel.daenzer@xxxxxxx> This ensures the GPU sees all previous CPU writes to VRAM, which makes it safe: * For userspace to stream data from CPU to GPU via VRAM instead of GTT * For IBs to be stored in VRAM instead of GTT * For ring buffers to be stored in VRAM instead of GTT, if the HPD flush is performed via MMIO Signed-off-by: Michel Dänzer <michel.daenzer@xxxxxxx> --- drivers/gpu/drm/radeon/cik.c | 4 ---- drivers/gpu/drm/radeon/r100.c | 20 +++++++++++++++----- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_asic.c | 6 ++++-- drivers/gpu/drm/radeon/radeon_asic.h | 3 ++- drivers/gpu/drm/radeon/radeon_drv.c | 4 +++- drivers/gpu/drm/radeon/radeon_ring.c | 10 ++++++++++ 7 files changed, 35 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 67194a5..a5dfe58 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3666,8 +3666,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); radeon_ring_write(ring, fence->seq); radeon_ring_write(ring, 0); - /* HDP flush */ - cik_hdp_flush_cp_ring_emit(rdev, fence->ring); } /** @@ -3696,8 +3694,6 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, upper_32_bits(addr)); radeon_ring_write(ring, fence->seq); radeon_ring_write(ring, 0); - /* HDP flush */ - cik_hdp_flush_cp_ring_emit(rdev, fence->ring); } bool cik_semaphore_ring_emit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 9241b89..e1bed43 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -837,11 +837,7 @@ void r100_fence_ring_emit(struct radeon_device *rdev, /* Wait until IDLE & CLEAN */ radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); - radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(ring, rdev->config.r100.hdp_cntl | - RADEON_HDP_READ_BUFFER_INVALIDATE); - radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(ring, rdev->config.r100.hdp_cntl); + r100_ring_hdp_flush(rdev, ring); /* Emit fence sequence & fire IRQ */ radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); radeon_ring_write(ring, fence->seq); @@ -1060,6 +1056,20 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, (void)RREG32(RADEON_CP_RB_WPTR); } +/** + * r100_ring_hdp_flush - flush Host Data Path via the ring buffer + * rdev: radeon device structure + * ring: ring buffer struct for emitting packets + */ +void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) +{ + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl | + RADEON_HDP_READ_BUFFER_INVALIDATE); + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl); +} + static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4a76e13..bc970b6 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1748,6 +1748,7 @@ struct radeon_asic_ring { /* command emmit functions */ void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); + void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring); bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index ba8caa7..1cb9330 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -185,6 +185,7 @@ static struct radeon_asic_ring r100_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr, + .hdp_flush = &r100_ring_hdp_flush, }; static struct radeon_asic r100_asic = { @@ -331,6 +332,7 @@ static struct radeon_asic_ring r300_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr, + .hdp_flush = &r100_ring_hdp_flush, }; static struct radeon_asic r300_asic = { @@ -1987,7 +1989,7 @@ static struct radeon_asic ci_asic = { .resume = &cik_resume, .asic_reset = &cik_asic_reset, .vga_set_state = &r600_vga_set_state, - .mmio_hdp_flush = NULL, + .mmio_hdp_flush = &r600_mmio_hdp_flush, .gui_idle = &r600_gui_idle, .mc_wait_for_idle = &evergreen_mc_wait_for_idle, .get_xclk = &cik_get_xclk, @@ -2091,7 +2093,7 @@ static struct radeon_asic kv_asic = { .resume = &cik_resume, .asic_reset = &cik_asic_reset, .vga_set_state = &r600_vga_set_state, - .mmio_hdp_flush = NULL, + .mmio_hdp_flush = &r600_mmio_hdp_flush, .gui_idle = &r600_gui_idle, .mc_wait_for_idle = &evergreen_mc_wait_for_idle, .get_xclk = &cik_get_xclk, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b8826c6..3cf6be6 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -148,7 +148,8 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); - +void r100_ring_hdp_flush(struct radeon_device *rdev, + struct radeon_ring *ring); /* * r200,rv250,rs300,rv280 */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index e9e3610..54aa293 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -82,9 +82,11 @@ * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG * 2.39.0 - Add INFO query for number of active CUs + * 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting + * CS to GPU */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 39 +#define KMS_DRIVER_MINOR 40 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 20b0e4f..ec27e05 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -445,11 +445,21 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig */ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) { + /* If we are emitting the HDP flush via the ring buffer, we need to + * do it before padding. + */ + if (rdev->asic->ring[ring->idx]->hdp_flush) + rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); /* We pad to match fetch size */ while (ring->wptr & ring->align_mask) { radeon_ring_write(ring, ring->nop); } mb(); + /* If we are emitting the HDP flush via MMIO, we need to do it after + * all CPU writes to VRAM finished. + */ + if (rdev->asic->mmio_hdp_flush) + rdev->asic->mmio_hdp_flush(rdev); radeon_ring_set_wptr(rdev, ring); } -- 2.0.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel