From: Christian Koenig <christian.koenig@xxxxxxx> The shader preemption on cayman doesn't work correctly with multiple rings. So to be able to still make use of the compute rings we use a semaphore to make sure that only one IB can execute at the same time. This isn't as effective as shader preemption, but also isn't as bad as putting everything on the GFX ring. Signed-off-by: Christian Koenig <christian.koenig@xxxxxxx> --- drivers/gpu/drm/radeon/ni.c | 142 ++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_cs.c | 2 +- 3 files changed, 139 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9d9f5ac..6a3e8a8 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1125,13 +1125,75 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); } +/* The shader preemption on cayman doesn't work + * correctly with multiple rings. So to be able to + * still make use of the compute rings we use a + * semaphore to make sure that only one IB can execute + * at the same time + */ +static void cayman_cp_ring_create_workaround(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + int r; + + r = radeon_semaphore_create(rdev, &rdev->cayman_ring_lock); + if (r) { + dev_err(rdev->dev, "Can't allocate " + "cayman_ring_lock (%d)!\n", r); + return; + } + + r = radeon_ring_alloc(rdev, ring, 8); + if (r) { + dev_err(rdev->dev, "Can't initialize " + "cayman_ring_lock (%d)!\n", r); + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); + return; + } + + radeon_semaphore_emit_signal(rdev, RADEON_RING_TYPE_GFX_INDEX, + rdev->cayman_ring_lock); + + radeon_ring_commit(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); +} + +static void cayman_cp_ring_cleanup_workaround(struct radeon_device *rdev) +{ + struct radeon_fence *fence; + int r; + + r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + dev_err(rdev->dev, "Can't cleanup " + "cayman_ring_lock (%d)!\n", r); + return; + } + + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, fence); + radeon_fence_unref(&fence); +} + void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + if (ib->ring != RADEON_RING_TYPE_GFX_INDEX) { + if (rdev->cayman_ring_lock == NULL) { + cayman_cp_ring_create_workaround(rdev); + } + } else { + if (rdev->cayman_ring_lock != NULL && + !radeon_fence_count_emitted(rdev, CAYMAN_RING_TYPE_CP1_INDEX) && + !radeon_fence_count_emitted(rdev, CAYMAN_RING_TYPE_CP2_INDEX)) { + cayman_cp_ring_cleanup_workaround(rdev); + } + } + /* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); radeon_ring_write(ring, 1); + if (rdev->cayman_ring_lock) + radeon_semaphore_emit_wait(rdev, ib->ring, rdev->cayman_ring_lock); radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); radeon_ring_write(ring, #ifdef __BIG_ENDIAN @@ -1140,6 +1202,8 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); + if (rdev->cayman_ring_lock) + radeon_semaphore_emit_signal(rdev, ib->ring, rdev->cayman_ring_lock); /* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); @@ -1190,6 +1254,25 @@ static int cayman_cp_load_microcode(struct radeon_device *rdev) return 0; } +static int cayman_cp_start_compute(struct radeon_device *rdev, int ridx) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + int r; + + r = radeon_ring_lock(rdev, ring, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + + /* clear the compute context state */ + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0) | 2); + radeon_ring_write(ring, 0); + + radeon_ring_unlock_commit(rdev, ring); + return 0; +} + static int cayman_cp_start(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -1251,7 +1334,17 @@ static int cayman_cp_start(struct radeon_device *rdev) radeon_ring_unlock_commit(rdev, ring); - /* XXX init other rings */ + r = cayman_cp_start_compute(rdev, RADEON_RING_TYPE_GFX_INDEX); + if (r) + return r; + + r = cayman_cp_start_compute(rdev, CAYMAN_RING_TYPE_CP1_INDEX); + if (r) + return r; + + r = cayman_cp_start_compute(rdev, CAYMAN_RING_TYPE_CP2_INDEX); + if (r) + return r; return 0; } @@ -1377,14 +1470,24 @@ int cayman_cp_resume(struct radeon_device *rdev) /* start the rings */ cayman_cp_start(rdev); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; - rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; - rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; - /* this only test cp0 */ r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); if (r) { rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + return r; + } + + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true; + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + if (r) { rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + return r; + } + + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true; + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + if (r) { rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; return r; } @@ -1468,7 +1571,7 @@ int cayman_asic_reset(struct radeon_device *rdev) static int cayman_startup(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r; /* enable pcie gen2 link */ @@ -1556,11 +1659,27 @@ static int cayman_startup(struct radeon_device *rdev) } evergreen_irq_set(rdev); + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, + CP_RB1_RPTR, CP_RB1_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, + CP_RB2_RPTR, CP_RB2_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + r = cayman_cp_load_microcode(rdev); if (r) return r; @@ -1607,6 +1726,7 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { /* FIXME: we should wait for ring to be empty */ + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); radeon_ib_pool_suspend(rdev); radeon_vm_manager_suspend(rdev); r600_blit_suspend(rdev); @@ -1626,7 +1746,7 @@ int cayman_suspend(struct radeon_device *rdev) */ int cayman_init(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r; /* Read BIOS */ @@ -1675,9 +1795,18 @@ int cayman_init(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024); + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 16 * 1024); + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 16 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -1734,6 +1863,7 @@ void cayman_fini(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_IGP) si_rlc_fini(rdev); radeon_wb_fini(rdev); + radeon_semaphore_free(rdev, &rdev->cayman_ring_lock, NULL); radeon_vm_manager_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 58a2fcf..1516b2d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1520,6 +1520,8 @@ struct radeon_device { /* virtual memory */ struct mutex vm_mutex; struct radeon_vm_manager vm_manager; + /* workaround for defect in caymans compute rings */ + struct radeon_semaphore *cayman_ring_lock; }; int radeon_device_init(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d1ead9c..54f3ec3 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -103,7 +103,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority p->ring = RADEON_RING_TYPE_GFX_INDEX; break; case RADEON_CS_RING_COMPUTE: - if (p->rdev->family >= CHIP_TAHITI) { + if (p->rdev->family >= CHIP_CAYMAN) { if (p->priority > 0) p->ring = CAYMAN_RING_TYPE_CP1_INDEX; else -- 1.7.9.5 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel