I looked really hard and couldn't find anything obviously broken. It makes me a bit nervous that there is no bounds checking on the nodes array, though. Just one minor nit pick. With that fixed, Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> On 16-08-29 05:20 AM, Christian König wrote: > From: Christian König <christian.koenig at amd.com> > > This allows us to map scattered VRAM BOs to the VMs. > > Signed-off-by: Christian König <christian.koenig at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 +++++++++++++++++++--------------- > 1 file changed, 44 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index ea1bd67..df6506d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1048,8 +1048,8 @@ error_free: > * @pages_addr: DMA addresses to use for mapping > * @vm: requested vm > * @mapping: mapped range and flags to use for the update > - * @addr: addr to set the area to > * @flags: HW flags for the mapping > + * @nodes: array of drm_mm_nodes with the MC addresses > * @fence: optional resulting fence > * > * Split the mapping into smaller chunks so that each update fits > @@ -1062,12 +1062,11 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, > dma_addr_t *pages_addr, > struct amdgpu_vm *vm, > struct amdgpu_bo_va_mapping *mapping, > - uint32_t flags, uint64_t addr, > + uint32_t flags, > + struct drm_mm_node *nodes, > struct fence **fence) > { > - const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; > - > - uint64_t src = 0, start = mapping->it.start; > + uint64_t offset, src = 0, start = mapping->it.start; > int r; > > /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here > @@ -1080,23 +1079,38 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, > > trace_amdgpu_vm_bo_update(mapping); > > - if (pages_addr) { > - if (flags == gtt_flags) > - src = adev->gart.table_addr + (addr >> 12) * 8; > - addr = 0; > + offset = mapping->offset; > + if (nodes) { > + while (offset > (nodes->size << PAGE_SHIFT)) { > + offset -= (nodes->size << PAGE_SHIFT); > + ++nodes; > + } > } > - addr += mapping->offset; > > - if (!pages_addr || src) > - return amdgpu_vm_bo_update_mapping(adev, exclusive, > - src, pages_addr, vm, > - start, mapping->it.last, > - flags, addr, fence); > + do { > + uint64_t max_entries; > + uint64_t addr, last; > > - while (start != mapping->it.last + 1) { > - uint64_t last; > + if (nodes) { > + addr = nodes->start << PAGE_SHIFT; > + max_entries = nodes->size - (offset >> PAGE_SHIFT); [FK] I think max_entries is the number of GPU page table entries. So you should convert from the system page size to the GPU page size: max_entries = ((nodes->size << PAGE_SHIFT) - offset) >> AMDGPU_GPU_PAGE_SHIFT; > + } else { > + addr = 0; > + max_entries = S64_MAX; > + } > + addr += offset; > + > + if (pages_addr) { > + if (flags == gtt_flags) > + src = adev->gart.table_addr + (addr >> 12) * 8; > + else > + max_entries = min(max_entries, 16ull * 1024ull); > + addr = 0; > + } else if (flags & AMDGPU_PTE_VALID) { > + addr += adev->vm_manager.vram_base_offset; > + } > > - last = min((uint64_t)mapping->it.last, start + max_size - 1); > + last = min((uint64_t)mapping->it.last, start + max_entries - 1); > r = amdgpu_vm_bo_update_mapping(adev, exclusive, > src, pages_addr, vm, > start, last, flags, addr, > @@ -1104,9 +1118,14 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, > if (r) > return r; > > + offset += (last - start + 1) * AMDGPU_GPU_PAGE_SIZE; > + if (nodes && nodes->size == (offset >> PAGE_SHIFT)) { > + offset = 0; > + ++nodes; > + } > start = last + 1; > - addr += max_size * AMDGPU_GPU_PAGE_SIZE; > - } > + > + } while (likely(start != mapping->it.last + 1)); > > return 0; > } > @@ -1130,34 +1149,24 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, > dma_addr_t *pages_addr = NULL; > uint32_t gtt_flags, flags; > struct ttm_mem_reg *mem; > + struct drm_mm_node *nodes; > struct fence *exclusive; > - uint64_t addr; > int r; > > if (clear) { > mem = NULL; > - addr = 0; > + nodes = NULL; > exclusive = NULL; > } else { > struct ttm_dma_tt *ttm; > > mem = &bo_va->bo->tbo.mem; > - addr = (u64)mem->start << PAGE_SHIFT; > - switch (mem->mem_type) { > - case TTM_PL_TT: > + nodes = mem->mm_node; > + if (mem->mem_type == TTM_PL_TT) { > ttm = container_of(bo_va->bo->tbo.ttm, struct > ttm_dma_tt, ttm); > pages_addr = ttm->dma_address; > - break; > - > - case TTM_PL_VRAM: > - addr += adev->vm_manager.vram_base_offset; > - break; > - > - default: > - break; > } > - > exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); > } > > @@ -1172,7 +1181,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, > list_for_each_entry(mapping, &bo_va->invalids, list) { > r = amdgpu_vm_bo_split_mapping(adev, exclusive, > gtt_flags, pages_addr, vm, > - mapping, flags, addr, > + mapping, flags, nodes, > &bo_va->last_pt_update); > if (r) > return r;