From: Christian König <christian.koenig@xxxxxxx> To workaround bugs and/or certain limits it's sometimes useful to fall back to waiting on fences. Signed-off-by: Christian König <christian.koenig@xxxxxxx> --- drivers/gpu/drm/radeon/cik.c | 13 ++- drivers/gpu/drm/radeon/cik_sdma.c | 13 ++- drivers/gpu/drm/radeon/evergreen_dma.c | 9 +-- drivers/gpu/drm/radeon/r100.c | 3 +- drivers/gpu/drm/radeon/r600.c | 13 ++- drivers/gpu/drm/radeon/r600_dma.c | 13 ++- drivers/gpu/drm/radeon/radeon.h | 14 ++-- drivers/gpu/drm/radeon/radeon_asic.h | 18 ++--- drivers/gpu/drm/radeon/radeon_cs.c | 9 ++- drivers/gpu/drm/radeon/radeon_fence.c | 30 +++++++ drivers/gpu/drm/radeon/radeon_gart.c | 4 +- drivers/gpu/drm/radeon/radeon_ring.c | 46 +++-------- drivers/gpu/drm/radeon/radeon_semaphore.c | 127 ++++++++++++++++++++++-------- drivers/gpu/drm/radeon/rv770_dma.c | 9 +-- drivers/gpu/drm/radeon/si_dma.c | 9 +-- drivers/gpu/drm/radeon/uvd_v1_0.c | 4 +- drivers/gpu/drm/radeon/uvd_v3_1.c | 4 +- 17 files changed, 191 insertions(+), 147 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ae92aa0..811fc1b 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3556,7 +3556,7 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); } -void cik_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -3567,6 +3567,8 @@ void cik_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); radeon_ring_write(ring, addr & 0xffffffff); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); + + return true; } /** @@ -3609,13 +3611,8 @@ int cik_copy_cpdma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 9c9529d..0300727 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -130,7 +130,7 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev, * Add a DMA semaphore packet to the ring wait on or signal * other rings (CIK). */ -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -141,6 +141,8 @@ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); radeon_ring_write(ring, addr & 0xfffffff8); radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + + return true; } /** @@ -443,13 +445,8 @@ int cik_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 6a0656d..a37b544 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -131,13 +131,8 @@ int evergreen_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 784983d..10abc4d 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -869,13 +869,14 @@ void r100_fence_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, RADEON_SW_INT_FIRE); } -void r100_semaphore_ring_emit(struct radeon_device *rdev, +bool r100_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) { /* Unused on older asics, since we don't have semaphores or multiple rings */ BUG(); + return false; } int r100_copy_blit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 4e609e8..9ad0673 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2650,7 +2650,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } } -void r600_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -2664,6 +2664,8 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); radeon_ring_write(ring, addr & 0xffffffff); radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); + + return true; } /** @@ -2706,13 +2708,8 @@ int r600_copy_cpdma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 3b31745..7844d15 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -311,7 +311,7 @@ void r600_dma_fence_ring_emit(struct radeon_device *rdev, * Add a DMA semaphore packet to the ring wait on or signal * other rings (r6xx-SI). */ -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -322,6 +322,8 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(addr) & 0xff); + + return true; } /** @@ -462,13 +464,8 @@ int r600_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b9ee992..2fe2f63 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -348,6 +348,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); +int radeon_fence_wait_locked(struct radeon_fence *fence); int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, @@ -548,17 +549,20 @@ struct radeon_semaphore { struct radeon_sa_bo *sa_bo; signed waiters; uint64_t gpu_addr; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; }; int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore); -void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); -void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); +void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, + struct radeon_fence *fence); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter); + int waiting_ring); void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence); @@ -765,7 +769,6 @@ struct radeon_ib { struct radeon_fence *fence; struct radeon_vm *vm; bool is_const_ib; - struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; }; @@ -921,7 +924,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, struct radeon_vm *vm, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); -void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib); int radeon_ib_pool_init(struct radeon_device *rdev); @@ -1638,7 +1640,7 @@ struct radeon_asic_ring { /* command emmit functions */ void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); - void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, + bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index f2833ee..c9fd97b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -80,7 +80,7 @@ int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); void r100_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r100_semaphore_ring_emit(struct radeon_device *rdev, +bool r100_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); @@ -313,13 +313,13 @@ int r600_cs_parse(struct radeon_cs_parser *p); int r600_dma_cs_parse(struct radeon_cs_parser *p); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r600_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void r600_dma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -566,10 +566,6 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev, */ void cayman_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cayman_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait); void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); int cayman_init(struct radeon_device *rdev); void cayman_fini(struct radeon_device *rdev); @@ -697,7 +693,7 @@ void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -717,7 +713,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); void cik_fence_compute_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cik_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); @@ -807,7 +803,7 @@ void uvd_v1_0_stop(struct radeon_device *rdev); int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); -void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, +bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -819,7 +815,7 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); /* uvd v3.1 */ -void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, +bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 26ca223..f41594b 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -159,7 +159,8 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) if (!p->relocs[i].robj) continue; - radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); + radeon_semaphore_sync_to(p->ib.semaphore, + p->relocs[i].robj->tbo.sync_obj); } } @@ -411,9 +412,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); - radeon_ib_sync_to(&parser->ib, vm->fence); - radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( - rdev, vm, parser->ring)); + radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); + radeon_semaphore_sync_to(parser->ib.semaphore, + radeon_vm_grab_id(rdev, vm, parser->ring)); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 281d14c..d3a86e4 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -472,6 +472,36 @@ int radeon_fence_wait_any(struct radeon_device *rdev, } /** + * radeon_fence_wait_locked - wait for a fence to signal + * + * @fence: radeon fence object + * + * Wait for the requested fence to signal (all asics). + * Returns 0 if the fence has passed, error for all other cases. + */ +int radeon_fence_wait_locked(struct radeon_fence *fence) +{ + uint64_t seq[RADEON_NUM_RINGS] = {}; + int r; + + if (fence == NULL) { + WARN(1, "Querying an invalid fence : %p !\n", fence); + return -EINVAL; + } + + seq[fence->ring] = fence->seq; + if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) + return 0; + + r = radeon_fence_wait_seq(fence->rdev, seq, false, false); + if (r) + return r; + + fence->seq = RADEON_FENCE_SIGNALED_SEQ; + return 0; +} + +/** * radeon_fence_wait_next_locked - wait for the next fence to signal * * @rdev: radeon device pointer diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 8a83b89..cd7489b 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -651,7 +651,7 @@ retry: radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, 0, pd_entries, 0, 0); - radeon_ib_sync_to(&ib, vm->fence); + radeon_semaphore_sync_to(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); @@ -1220,7 +1220,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, radeon_vm_page_flags(bo_va->flags)); - radeon_ib_sync_to(&ib, vm->fence); + radeon_semaphore_sync_to(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 18254e1..9214403 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, struct radeon_vm *vm, unsigned size) { - int i, r; + int r; r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) { @@ -87,8 +87,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); } ib->is_const_ib = false; - for (i = 0; i < RADEON_NUM_RINGS; ++i) - ib->sync_to[i] = NULL; return 0; } @@ -109,25 +107,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) } /** - * radeon_ib_sync_to - sync to fence before executing the IB - * - * @ib: IB object to add fence to - * @fence: fence to sync to - * - * Sync to the fence before executing the IB - */ -void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence) -{ - struct radeon_fence *other; - - if (!fence) - return; - - other = ib->sync_to[fence->ring]; - ib->sync_to[fence->ring] = radeon_fence_later(fence, other); -} - -/** * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @rdev: radeon_device pointer @@ -151,8 +130,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - bool need_sync = false; - int i, r = 0; + int r = 0; if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ @@ -166,19 +144,15 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - struct radeon_fence *fence = ib->sync_to[i]; - if (radeon_fence_need_sync(fence, ib->ring)) { - need_sync = true; - radeon_semaphore_sync_rings(rdev, ib->semaphore, - fence->ring, ib->ring); - radeon_fence_note_sync(fence, ib->ring); - } - } - /* immediately free semaphore when we don't need to sync */ - if (!need_sync) { - radeon_semaphore_free(rdev, &ib->semaphore, NULL); + + /* sync with other rings */ + r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to sync rings (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; } + /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush for every IB */ if (ib->vm /*&& !ib->vm->last_flush*/) { diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 97d73bf..2b42aa1 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -34,7 +34,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) { - int r; + int i, r; *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); if (*semaphore == NULL) { @@ -50,58 +50,121 @@ int radeon_semaphore_create(struct radeon_device *rdev, (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + (*semaphore)->sync_to[i] = NULL; + return 0; } -void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx, struct radeon_semaphore *semaphore) { - trace_radeon_semaphore_signale(ring, semaphore); + struct radeon_ring *ring = &rdev->ring[ridx]; + + trace_radeon_semaphore_signale(ridx, semaphore); + + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, false)) { + --semaphore->waiters; - --semaphore->waiters; - radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false); + /* for debugging lockup only, used by sysfs debug files */ + ring->last_semaphore_signal_addr = semaphore->gpu_addr; + return true; + } + return false; } -void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, struct radeon_semaphore *semaphore) { - trace_radeon_semaphore_wait(ring, semaphore); + struct radeon_ring *ring = &rdev->ring[ridx]; + + trace_radeon_semaphore_wait(ridx, semaphore); + + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, true)) { + ++semaphore->waiters; - ++semaphore->waiters; - radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); + /* for debugging lockup only, used by sysfs debug files */ + ring->last_semaphore_wait_addr = semaphore->gpu_addr; + return true; + } + return false; } -/* caller must hold ring lock */ +/** + * radeon_semaphore_sync_to - use the semaphore to sync to a fence + * + * @semaphore: semaphore object to add fence to + * @fence: fence to sync to + * + * Sync to the fence using this semaphore object + */ +void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, + struct radeon_fence *fence) +{ + struct radeon_fence *other; + + if (!fence) + return; + + other = semaphore->sync_to[fence->ring]; + semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); +} + +/** + * radeon_semaphore_sync_rings - sync ring to all registered fences + * + * @rdev: radeon_device pointer + * @semaphore: semaphore object to use for sync + * @ring: ring that needs sync + * + * Ensure that all registered fences are signaled before letting + * the ring continue. The caller must hold the ring lock. + */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter) + int ring) { - int r; + int i, r; - /* no need to signal and wait on the same ring */ - if (signaler == waiter) { - return 0; - } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = semaphore->sync_to[i]; - /* prevent GPU deadlocks */ - if (!rdev->ring[signaler].ready) { - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); - return -EINVAL; - } + /* check if we really need to sync */ + if (!radeon_fence_need_sync(fence, ring)) + continue; - r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); - if (r) { - return r; - } - radeon_semaphore_emit_signal(rdev, signaler, semaphore); - radeon_ring_commit(rdev, &rdev->ring[signaler]); + /* prevent GPU deadlocks */ + if (!rdev->ring[i].ready) { + dev_err(rdev->dev, "Syncing to a disabled ring!"); + return -EINVAL; + } - /* we assume caller has already allocated space on waiters ring */ - radeon_semaphore_emit_wait(rdev, waiter, semaphore); + /* allocate enough space for sync command */ + r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); + if (r) { + return r; + } - /* for debugging lockup only, used by sysfs debug files */ - rdev->ring[signaler].last_semaphore_signal_addr = semaphore->gpu_addr; - rdev->ring[waiter].last_semaphore_wait_addr = semaphore->gpu_addr; + /* emit the signal semaphore */ + if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { + /* signaling wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + radeon_fence_wait_locked(fence); + continue; + } + + /* we assume caller has already allocated space on waiters ring */ + if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { + /* waiting wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + radeon_fence_wait_locked(fence); + continue; + } + + radeon_ring_commit(rdev, &rdev->ring[i]); + radeon_fence_note_sync(fence, ring); + } return 0; } diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index f9b02e3..aca8cbe 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -66,13 +66,8 @@ int rv770_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 8e8f461..59be2cf 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -195,13 +195,8 @@ int si_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 7266805..d4a68af 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -357,7 +357,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) * * Emit a semaphore command (either wait or signal) to the UVD ring. */ -void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, +bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -372,6 +372,8 @@ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); radeon_ring_write(ring, emit_wait ? 1 : 0); + + return true; } /** diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c index 5b6fa1f..d722db2 100644 --- a/drivers/gpu/drm/radeon/uvd_v3_1.c +++ b/drivers/gpu/drm/radeon/uvd_v3_1.c @@ -37,7 +37,7 @@ * * Emit a semaphore command (either wait or signal) to the UVD ring. */ -void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, +bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -52,4 +52,6 @@ void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); + + return true; } -- 1.8.1.2 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel