Move flushing the VMs as function into the rings. First step to make VM operations async. Signed-off-by: Christian König <deathsimple@xxxxxxxxxxx> Reviewed-by: Jerome Glisse <jglisse@xxxxxxxxxx> --- drivers/gpu/drm/radeon/ni.c | 31 ++++++++++++++++--------------- drivers/gpu/drm/radeon/radeon.h | 6 ++++-- drivers/gpu/drm/radeon/radeon_asic.c | 12 +++++++++--- drivers/gpu/drm/radeon/radeon_asic.h | 3 +-- drivers/gpu/drm/radeon/radeon_cs.c | 1 + drivers/gpu/drm/radeon/radeon_gart.c | 4 +++- drivers/gpu/drm/radeon/radeon_ring.c | 8 ++++++++ drivers/gpu/drm/radeon/si.c | 15 --------------- 8 files changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 3b1aab3..ad337e8 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1494,24 +1494,9 @@ int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0); WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-7 are the VM contexts0-7 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << id); return 0; } -void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) -{ - if (vm->id == -1) - return; - - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-7 are the VM contexts0-7 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); -} - #define R600_PTE_VALID (1 << 0) #define R600_PTE_SYSTEM (1 << 1) #define R600_PTE_SNOOPED (1 << 2) @@ -1543,3 +1528,19 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, addr |= flags; writeq(addr, ptr + (pfn * 8)); } + +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (!ib->vm || ib->vm->id == -1) + return; + + /* flush hdp cache */ + radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); + radeon_ring_write(ring, 0x1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); + radeon_ring_write(ring, 1 << ib->vm->id); +} diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index aeb2d1f..1228778 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -658,6 +658,8 @@ struct radeon_vm { struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; + /* last flush or NULL if we still need to flush */ + struct radeon_fence *last_flush; }; struct radeon_vm_manager { @@ -1135,7 +1137,6 @@ struct radeon_asic { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); - void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm); uint32_t (*page_flags)(struct radeon_device *rdev, struct radeon_vm *vm, uint32_t flags); @@ -1154,6 +1155,7 @@ struct radeon_asic { int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp); bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp); + void (*vm_flush)(struct radeon_device *rdev, struct radeon_ib *ib); } ring[RADEON_NUM_RINGS]; /* irqs */ struct { @@ -1696,7 +1698,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) #define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id)) -#define radeon_asic_vm_tlb_flush(rdev, v) (rdev)->asic->vm.tlb_flush((rdev), (v)) #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags)) #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags)) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) @@ -1705,6 +1706,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib)) #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib)) #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp)) +#define radeon_ring_vm_flush(rdev, r, ib) (rdev)->asic->ring[(r)].vm_flush((rdev), (ib)) #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 9d5f4f3..b44ab96 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1360,7 +1360,6 @@ static struct radeon_asic cayman_asic = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, .bind = &cayman_vm_bind, - .tlb_flush = &cayman_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1374,6 +1373,7 @@ static struct radeon_asic cayman_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1384,6 +1384,7 @@ static struct radeon_asic cayman_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1394,6 +1395,7 @@ static struct radeon_asic cayman_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, } }, .irq = { @@ -1462,7 +1464,6 @@ static struct radeon_asic trinity_asic = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, .bind = &cayman_vm_bind, - .tlb_flush = &cayman_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1476,6 +1477,7 @@ static struct radeon_asic trinity_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1486,6 +1488,7 @@ static struct radeon_asic trinity_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1496,6 +1499,7 @@ static struct radeon_asic trinity_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, } }, .irq = { @@ -1564,7 +1568,6 @@ static struct radeon_asic si_asic = { .init = &si_vm_init, .fini = &si_vm_fini, .bind = &si_vm_bind, - .tlb_flush = &si_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1578,6 +1581,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1588,6 +1592,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1598,6 +1603,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, } }, .irq = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index f4af243..87466d3 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -440,7 +440,7 @@ int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); -void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm); +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, struct radeon_vm *vm, uint32_t flags); @@ -470,7 +470,6 @@ int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); -void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); #endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 85a80e4..d4a804b 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -484,6 +484,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); + radeon_cs_sync_to(parser, vm->last_flush); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 18a03ab..a6a3dca 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -520,6 +520,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, break; } radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush); /* hw unbind */ rdev->vm_manager.use_bitmap &= ~(1 << vm->id); @@ -639,6 +640,7 @@ retry_id: /* do hw bind */ r = radeon_asic_vm_bind(rdev, vm, id); + radeon_fence_unref(&vm->last_flush); if (r) { radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); return r; @@ -836,7 +838,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } radeon_asic_vm_set_page(rdev, bo_va->vm, i + pfn, addr, flags); } - radeon_asic_vm_tlb_flush(rdev, bo_va->vm); + radeon_fence_unref(&vm->last_flush); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 9ab3a3c..ceb334b 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -160,6 +160,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (!need_sync) { radeon_semaphore_free(rdev, &ib->semaphore, NULL); } + /* if we can't remember our last VM flush then flush now! */ + if (ib->vm && !ib->vm->last_flush) { + radeon_ring_vm_flush(rdev, ib->ring, ib); + } if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); @@ -174,6 +178,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } + /* we just flushed the VM, remember that */ + if (ib->vm && !ib->vm->last_flush) { + ib->vm->last_flush = radeon_fence_ref(ib->fence); + } radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 40a9a85..7ef16d6 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2788,24 +2788,9 @@ int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) else WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2), vm->pt_gpu_addr >> 12); - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-15 are the VM contexts0-15 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << id); return 0; } -void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) -{ - if (vm->id == -1) - return; - - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-15 are the VM contexts0-15 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); -} - /* * RLC */ -- 1.7.9.5 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel