On Fri, Feb 1, 2013 at 11:32 AM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > From: Alex Deucher <alexander.deucher@xxxxxxx> > > For very large page table updates, we can exceed the > size of the ring. To avoid this, use an IB to perform > the page table update. > > v2(ck): cleanup the IB infrastructure and the use it instead > of filling the struct ourself. > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > Signed-off-by: Christian König <christian.koenig@xxxxxxx> Looks good! thanks for fixing that up. I'll apply it to my WIP 3.9 tree and test on cayman. Alex > --- > drivers/gpu/drm/radeon/ni.c | 30 +++++++++-------- > drivers/gpu/drm/radeon/radeon.h | 7 ++-- > drivers/gpu/drm/radeon/radeon_asic.h | 8 +++-- > drivers/gpu/drm/radeon/radeon_cs.c | 19 +++-------- > drivers/gpu/drm/radeon/radeon_gart.c | 60 ++++++++++++++-------------------- > drivers/gpu/drm/radeon/radeon_ring.c | 19 +++++++++++ > drivers/gpu/drm/radeon/si.c | 52 +++++++++++++++-------------- > 7 files changed, 103 insertions(+), 92 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c > index 170bd03..7cead76 100644 > --- a/drivers/gpu/drm/radeon/ni.c > +++ b/drivers/gpu/drm/radeon/ni.c > @@ -1946,19 +1946,21 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags) > * cayman_vm_set_page - update the page tables using the CP > * > * @rdev: radeon_device pointer > + * @ib: indirect buffer to fill with commands > * @pe: addr of the page entry > * @addr: dst addr to write into pe > * @count: number of page entries to update > * @incr: increase next addr by incr bytes > * @flags: access flags > * > - * Update the page tables using the CP (cayman-si). > + * Update the page tables using the CP (cayman/TN). > */ > -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, > +void cayman_vm_set_page(struct radeon_device *rdev, > + struct radeon_ib *ib, > + uint64_t pe, > uint64_t addr, unsigned count, > uint32_t incr, uint32_t flags) > { > - struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; > uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); > uint64_t value; > unsigned ndw; > @@ -1969,9 +1971,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, > if (ndw > 0x3FFF) > ndw = 0x3FFF; > > - radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); > - radeon_ring_write(ring, pe); > - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); > + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw); > + ib->ptr[ib->length_dw++] = pe; > + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; > for (; ndw > 1; ndw -= 2, --count, pe += 8) { > if (flags & RADEON_VM_PAGE_SYSTEM) { > value = radeon_vm_map_gart(rdev, addr); > @@ -1983,8 +1985,8 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, > } > addr += incr; > value |= r600_flags; > - radeon_ring_write(ring, value); > - radeon_ring_write(ring, upper_32_bits(value)); > + ib->ptr[ib->length_dw++] = value; > + ib->ptr[ib->length_dw++] = upper_32_bits(value); > } > } > } else { > @@ -1994,9 +1996,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, > ndw = 0xFFFFE; > > /* for non-physically contiguous pages (system) */ > - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw)); > - radeon_ring_write(ring, pe); > - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); > + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); > + ib->ptr[ib->length_dw++] = pe; > + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; > for (; ndw > 0; ndw -= 2, --count, pe += 8) { > if (flags & RADEON_VM_PAGE_SYSTEM) { > value = radeon_vm_map_gart(rdev, addr); > @@ -2008,10 +2010,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, > } > addr += incr; > value |= r600_flags; > - radeon_ring_write(ring, value); > - radeon_ring_write(ring, upper_32_bits(value)); > + ib->ptr[ib->length_dw++] = value; > + ib->ptr[ib->length_dw++] = upper_32_bits(value); > } > } > + while (ib->length_dw & 0x7) > + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); > } > } > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 6539d6c..307d681 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -780,6 +780,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, > struct radeon_ib *ib, struct radeon_vm *vm, > unsigned size); > void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); > +void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence); > int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, > struct radeon_ib *const_ib); > int radeon_ib_pool_init(struct radeon_device *rdev); > @@ -1188,7 +1189,9 @@ struct radeon_asic { > void (*fini)(struct radeon_device *rdev); > > u32 pt_ring_index; > - void (*set_page)(struct radeon_device *rdev, uint64_t pe, > + void (*set_page)(struct radeon_device *rdev, > + struct radeon_ib *ib, > + uint64_t pe, > uint64_t addr, unsigned count, > uint32_t incr, uint32_t flags); > } vm; > @@ -1810,7 +1813,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); > #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) > #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) > #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) > -#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags))) > +#define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags))) > #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) > #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp)) > #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp)) > diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h > index e429e25..f4134a8 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.h > +++ b/drivers/gpu/drm/radeon/radeon_asic.h > @@ -474,7 +474,9 @@ int cayman_vm_init(struct radeon_device *rdev); > void cayman_vm_fini(struct radeon_device *rdev); > void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); > uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); > -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, > +void cayman_vm_set_page(struct radeon_device *rdev, > + struct radeon_ib *ib, > + uint64_t pe, > uint64_t addr, unsigned count, > uint32_t incr, uint32_t flags); > int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); > @@ -506,7 +508,9 @@ int si_irq_set(struct radeon_device *rdev); > int si_irq_process(struct radeon_device *rdev); > int si_vm_init(struct radeon_device *rdev); > void si_vm_fini(struct radeon_device *rdev); > -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, > +void si_vm_set_page(struct radeon_device *rdev, > + struct radeon_ib *ib, > + uint64_t pe, > uint64_t addr, unsigned count, > uint32_t incr, uint32_t flags); > void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 1d214b6..70d3824 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -125,18 +125,6 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority > return 0; > } > > -static void radeon_cs_sync_to(struct radeon_cs_parser *p, > - struct radeon_fence *fence) > -{ > - struct radeon_fence *other; > - > - if (!fence) > - return; > - > - other = p->ib.sync_to[fence->ring]; > - p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other); > -} > - > static void radeon_cs_sync_rings(struct radeon_cs_parser *p) > { > int i; > @@ -145,7 +133,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) > if (!p->relocs[i].robj) > continue; > > - radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj); > + radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); > } > } > > @@ -472,8 +460,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, > goto out; > } > radeon_cs_sync_rings(parser); > - radeon_cs_sync_to(parser, vm->fence); > - radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring)); > + radeon_ib_sync_to(&parser->ib, vm->fence); > + radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( > + rdev, vm, parser->ring)); > > if ((rdev->family >= CHIP_TAHITI) && > (parser->chunk_const_ib_idx != -1)) { > diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c > index 6e24f84..2c1341f 100644 > --- a/drivers/gpu/drm/radeon/radeon_gart.c > +++ b/drivers/gpu/drm/radeon/radeon_gart.c > @@ -929,6 +929,7 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) > */ > static int radeon_vm_update_pdes(struct radeon_device *rdev, > struct radeon_vm *vm, > + struct radeon_ib *ib, > uint64_t start, uint64_t end) > { > static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; > @@ -971,7 +972,7 @@ retry: > ((last_pt + incr * count) != pt)) { > > if (count) { > - radeon_asic_vm_set_page(rdev, last_pde, > + radeon_asic_vm_set_page(rdev, ib, last_pde, > last_pt, count, incr, > RADEON_VM_PAGE_VALID); > } > @@ -985,7 +986,7 @@ retry: > } > > if (count) { > - radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, > + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, > incr, RADEON_VM_PAGE_VALID); > > } > @@ -1009,6 +1010,7 @@ retry: > */ > static void radeon_vm_update_ptes(struct radeon_device *rdev, > struct radeon_vm *vm, > + struct radeon_ib *ib, > uint64_t start, uint64_t end, > uint64_t dst, uint32_t flags) > { > @@ -1038,7 +1040,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, > if ((last_pte + 8 * count) != pte) { > > if (count) { > - radeon_asic_vm_set_page(rdev, last_pte, > + radeon_asic_vm_set_page(rdev, ib, last_pte, > last_dst, count, > RADEON_GPU_PAGE_SIZE, > flags); > @@ -1056,7 +1058,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, > } > > if (count) { > - radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, > + radeon_asic_vm_set_page(rdev, ib, last_pte, > + last_dst, count, > RADEON_GPU_PAGE_SIZE, flags); > } > } > @@ -1080,8 +1083,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > struct ttm_mem_reg *mem) > { > unsigned ridx = rdev->asic->vm.pt_ring_index; > - struct radeon_ring *ring = &rdev->ring[ridx]; > - struct radeon_semaphore *sem = NULL; > + struct radeon_ib ib; > struct radeon_bo_va *bo_va; > unsigned nptes, npdes, ndw; > uint64_t addr; > @@ -1124,25 +1126,13 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > bo_va->valid = false; > } > > - if (vm->fence && radeon_fence_signaled(vm->fence)) { > - radeon_fence_unref(&vm->fence); > - } > - > - if (vm->fence && vm->fence->ring != ridx) { > - r = radeon_semaphore_create(rdev, &sem); > - if (r) { > - return r; > - } > - } > - > nptes = radeon_bo_ngpu_pages(bo); > > /* assume two extra pdes in case the mapping overlaps the borders */ > npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; > > - /* estimate number of dw needed */ > - /* semaphore, fence and padding */ > - ndw = 32; > + /* padding, etc. */ > + ndw = 64; > > if (RADEON_VM_BLOCK_SIZE > 11) > /* reserve space for one header for every 2k dwords */ > @@ -1161,33 +1151,31 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > /* reserve space for pde addresses */ > ndw += npdes * 2; > > - r = radeon_ring_lock(rdev, ring, ndw); > - if (r) { > - return r; > - } > + /* update too big for an IB */ > + if (ndw > 0xfffff) > + return -ENOMEM; > > - if (sem && radeon_fence_need_sync(vm->fence, ridx)) { > - radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx); > - radeon_fence_note_sync(vm->fence, ridx); > - } > + r = radeon_ib_get(rdev, ridx, &ib, NULL, ndw * 4); > + ib.length_dw = 0; > > - r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); > + r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); > if (r) { > - radeon_ring_unlock_undo(rdev, ring); > + radeon_ib_free(rdev, &ib); > return r; > } > > - radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, > + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, > addr, bo_va->flags); > > - radeon_fence_unref(&vm->fence); > - r = radeon_fence_emit(rdev, &vm->fence, ridx); > + radeon_ib_sync_to(&ib, vm->fence); > + r = radeon_ib_schedule(rdev, &ib, NULL); > if (r) { > - radeon_ring_unlock_undo(rdev, ring); > + radeon_ib_free(rdev, &ib); > return r; > } > - radeon_ring_unlock_commit(rdev, ring); > - radeon_semaphore_free(rdev, &sem, vm->fence); > + radeon_fence_unref(&vm->fence); > + vm->fence = radeon_fence_ref(ib.fence); > + radeon_ib_free(rdev, &ib); > radeon_fence_unref(&vm->last_flush); > > return 0; > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c > index cd72062..8d58e26 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -109,6 +109,25 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) > } > > /** > + * radeon_ib_sync_to - sync to fence before executing the IB > + * > + * @ib: IB object to add fence to > + * @fence: fence to sync to > + * > + * Sync to the fence before executing the IB > + */ > +void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence) > +{ > + struct radeon_fence *other; > + > + if (!fence) > + return; > + > + other = ib->sync_to[fence->ring]; > + ib->sync_to[fence->ring] = radeon_fence_later(fence, other); > +} > + > +/** > * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring > * > * @rdev: radeon_device pointer > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c > index cd83bc5..a910cb9 100644 > --- a/drivers/gpu/drm/radeon/si.c > +++ b/drivers/gpu/drm/radeon/si.c > @@ -3043,19 +3043,21 @@ void si_vm_fini(struct radeon_device *rdev) > * si_vm_set_page - update the page tables using the CP > * > * @rdev: radeon_device pointer > + * @ib: indirect buffer to fill with commands > * @pe: addr of the page entry > * @addr: dst addr to write into pe > * @count: number of page entries to update > * @incr: increase next addr by incr bytes > * @flags: access flags > * > - * Update the page tables using the CP (cayman-si). > + * Update the page tables using the CP (SI). > */ > -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, > +void si_vm_set_page(struct radeon_device *rdev, > + struct radeon_ib *ib, > + uint64_t pe, > uint64_t addr, unsigned count, > uint32_t incr, uint32_t flags) > { > - struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; > uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); > uint64_t value; > unsigned ndw; > @@ -3066,11 +3068,11 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, > if (ndw > 0x3FFE) > ndw = 0x3FFE; > > - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(1))); > - radeon_ring_write(ring, pe); > - radeon_ring_write(ring, upper_32_bits(pe)); > + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); > + ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | > + WRITE_DATA_DST_SEL(1)); > + ib->ptr[ib->length_dw++] = pe; > + ib->ptr[ib->length_dw++] = upper_32_bits(pe); > for (; ndw > 2; ndw -= 2, --count, pe += 8) { > if (flags & RADEON_VM_PAGE_SYSTEM) { > value = radeon_vm_map_gart(rdev, addr); > @@ -3082,8 +3084,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, > } > addr += incr; > value |= r600_flags; > - radeon_ring_write(ring, value); > - radeon_ring_write(ring, upper_32_bits(value)); > + ib->ptr[ib->length_dw++] = value; > + ib->ptr[ib->length_dw++] = upper_32_bits(value); > } > } > } else { > @@ -3095,9 +3097,9 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, > ndw = 0xFFFFE; > > /* for non-physically contiguous pages (system) */ > - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw)); > - radeon_ring_write(ring, pe); > - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); > + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); > + ib->ptr[ib->length_dw++] = pe; > + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; > for (; ndw > 0; ndw -= 2, --count, pe += 8) { > if (flags & RADEON_VM_PAGE_SYSTEM) { > value = radeon_vm_map_gart(rdev, addr); > @@ -3109,8 +3111,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, > } > addr += incr; > value |= r600_flags; > - radeon_ring_write(ring, value); > - radeon_ring_write(ring, upper_32_bits(value)); > + ib->ptr[ib->length_dw++] = value; > + ib->ptr[ib->length_dw++] = upper_32_bits(value); > } > } > } else { > @@ -3124,20 +3126,22 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, > else > value = 0; > /* for physically contiguous pages (vram) */ > - radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw)); > - radeon_ring_write(ring, pe); /* dst addr */ > - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); > - radeon_ring_write(ring, r600_flags); /* mask */ > - radeon_ring_write(ring, 0); > - radeon_ring_write(ring, value); /* value */ > - radeon_ring_write(ring, upper_32_bits(value)); > - radeon_ring_write(ring, incr); /* increment size */ > - radeon_ring_write(ring, 0); > + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); > + ib->ptr[ib->length_dw++] = pe; /* dst addr */ > + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; > + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ > + ib->ptr[ib->length_dw++] = 0; > + ib->ptr[ib->length_dw++] = value; /* value */ > + ib->ptr[ib->length_dw++] = upper_32_bits(value); > + ib->ptr[ib->length_dw++] = incr; /* increment size */ > + ib->ptr[ib->length_dw++] = 0; > pe += ndw * 4; > addr += (ndw / 2) * incr; > count -= ndw / 2; > } > } > + while (ib->length_dw & 0x7) > + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); > } > } > > -- > 1.7.9.5 > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel