On Sun, Mar 22, 2020 at 04:48:34PM +0100, Christian König wrote: > Cleanup amdgpu_ttm_copy_mem_to_mem by using fewer variables > for the same value. > > Rename amdgpu_map_buffer to amdgpu_ttm_map_buffer, move it > to avoid the forward decleration, cleanup by moving the map > decission into the function and add some documentation. > > No functional change. > > v2: add some more cleanup suggested by Felix > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 269 ++++++++++++++++---------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- > 2 files changed, 135 insertions(+), 138 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 665db2353a78..53de99dbaead 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -62,12 +62,6 @@ > > #define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 > > -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, > - struct ttm_mem_reg *mem, unsigned num_pages, > - uint64_t offset, unsigned window, > - struct amdgpu_ring *ring, bool tmz, > - uint64_t *addr); > - > static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) > { > return 0; > @@ -282,7 +276,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, > * > */ > static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, > - unsigned long *offset) > + uint64_t *offset) > { > struct drm_mm_node *mm_node = mem->mm_node; > > @@ -293,6 +287,94 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, > return mm_node; > } > > +/** > + * amdgpu_ttm_map_buffer - Map memory into the GART windows > + * @bo: buffer object to map > + * @mem: memory object to map > + * @mm_node: drm_mm node object to map > + * @num_pages: number of pages to map > + * @offset: offset into @mm_node where to start > + * @window: which GART window to use > + * @ring: DMA ring to use for the copy > + * @tmz: if we should setup a TMZ enabled mapping > + * @addr: resulting address inside the MC address space > + * > + * Setup one of the GART windows to access a specific piece of memory or return > + * the physical address for local memory. > + */ > +static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, > + struct ttm_mem_reg *mem, > + struct drm_mm_node *mm_node, > + unsigned num_pages, uint64_t offset, > + unsigned window, struct amdgpu_ring *ring, > + bool tmz, uint64_t *addr) > +{ > + struct ttm_dma_tt *dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); > + struct amdgpu_device *adev = ring->adev; > + struct amdgpu_job *job; > + unsigned num_dw, num_bytes; > + dma_addr_t *dma_address; > + struct dma_fence *fence; > + uint64_t src_addr, dst_addr; > + uint64_t flags; > + int r; > + > + BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < > + AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); > + > + /* Map only what can't be accessed directly */ > + if (mem->start != AMDGPU_BO_INVALID_OFFSET) { > + *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset; > + return 0; > + } > + > + *addr = adev->gmc.gart_start; > + *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * > + AMDGPU_GPU_PAGE_SIZE; > + *addr += offset & ~PAGE_MASK; > + > + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); > + num_bytes = num_pages * 8; > + > + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); > + if (r) > + return r; > + > + src_addr = num_dw * 4; > + src_addr += job->ibs[0].gpu_addr; > + > + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); > + dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; > + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, > + dst_addr, num_bytes, false); > + > + amdgpu_ring_pad_ib(ring, &job->ibs[0]); > + WARN_ON(job->ibs[0].length_dw > num_dw); > + > + dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; > + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); > + if (tmz) > + flags |= AMDGPU_PTE_TMZ; Move the flags setting into amdgpu_ttm_tt_pte_flags()? Others look good for me, Reviewed-by: Huang Rui <ray.huang@xxxxxxx> > + > + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, > + &job->ibs[0].ptr[num_dw]); > + if (r) > + goto error_free; > + > + r = amdgpu_job_submit(job, &adev->mman.entity, > + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); > + if (r) > + goto error_free; > + > + dma_fence_put(fence); > + > + return r; > + > +error_free: > + amdgpu_job_free(job); > + return r; > +} > + > /** > * amdgpu_copy_ttm_mem_to_mem - Helper function for copy > * @adev: amdgpu device > @@ -309,79 +391,62 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, > * > */ > int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > - struct amdgpu_copy_mem *src, > - struct amdgpu_copy_mem *dst, > + const struct amdgpu_copy_mem *src, > + const struct amdgpu_copy_mem *dst, > uint64_t size, bool tmz, > struct dma_resv *resv, > struct dma_fence **f) > { > + const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * > + AMDGPU_GPU_PAGE_SIZE); > + > + uint64_t src_node_size, dst_node_size, src_offset, dst_offset; > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > struct drm_mm_node *src_mm, *dst_mm; > - uint64_t src_node_start, dst_node_start, src_node_size, > - dst_node_size, src_page_offset, dst_page_offset; > struct dma_fence *fence = NULL; > int r = 0; > - const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * > - AMDGPU_GPU_PAGE_SIZE); > > if (!adev->mman.buffer_funcs_enabled) { > DRM_ERROR("Trying to move memory with ring turned off.\n"); > return -EINVAL; > } > > - src_mm = amdgpu_find_mm_node(src->mem, &src->offset); > - src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + > - src->offset; > - src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; > - src_page_offset = src_node_start & (PAGE_SIZE - 1); > + src_offset = src->offset; > + src_mm = amdgpu_find_mm_node(src->mem, &src_offset); > + src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset; > > - dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); > - dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + > - dst->offset; > - dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; > - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); > + dst_offset = dst->offset; > + dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset); > + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset; > > mutex_lock(&adev->mman.gtt_window_lock); > > while (size) { > - unsigned long cur_size; > - uint64_t from = src_node_start, to = dst_node_start; > + uint32_t src_page_offset = src_offset & ~PAGE_MASK; > + uint32_t dst_page_offset = dst_offset & ~PAGE_MASK; > struct dma_fence *next; > + uint32_t cur_size; > + uint64_t from, to; > > /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst > * begins at an offset, then adjust the size accordingly > */ > - cur_size = min3(min(src_node_size, dst_node_size), size, > - GTT_MAX_BYTES); > - if (cur_size + src_page_offset > GTT_MAX_BYTES || > - cur_size + dst_page_offset > GTT_MAX_BYTES) > - cur_size -= max(src_page_offset, dst_page_offset); > - > - /* Map only what needs to be accessed. Map src to window 0 and > - * dst to window 1 > - */ > - if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { > - r = amdgpu_map_buffer(src->bo, src->mem, > - PFN_UP(cur_size + src_page_offset), > - src_node_start, 0, ring, tmz, > - &from); > - if (r) > - goto error; > - /* Adjust the offset because amdgpu_map_buffer returns > - * start of mapped page > - */ > - from += src_page_offset; > - } > + cur_size = min3(src_node_size, dst_node_size, size); > + cur_size = min(GTT_MAX_BYTES - src_page_offset, cur_size); > + cur_size = min(GTT_MAX_BYTES - dst_page_offset, cur_size); > + > + /* Map src to window 0 and dst to window 1. */ > + r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm, > + PFN_UP(cur_size + src_page_offset), > + src_offset, 0, ring, tmz, &from); > + if (r) > + goto error; > > - if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { > - r = amdgpu_map_buffer(dst->bo, dst->mem, > - PFN_UP(cur_size + dst_page_offset), > - dst_node_start, 1, ring, tmz, > - &to); > - if (r) > - goto error; > - to += dst_page_offset; > - } > + r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm, > + PFN_UP(cur_size + dst_page_offset), > + dst_offset, 1, ring, tmz, &to); > + if (r) > + goto error; > > r = amdgpu_copy_buffer(ring, from, to, cur_size, > resv, &next, false, true, tmz); > @@ -397,23 +462,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > > src_node_size -= cur_size; > if (!src_node_size) { > - src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, > - src->mem); > - src_node_size = (src_mm->size << PAGE_SHIFT); > - src_page_offset = 0; > + ++src_mm; > + src_node_size = src_mm->size << PAGE_SHIFT; > + src_offset = 0; > } else { > - src_node_start += cur_size; > - src_page_offset = src_node_start & (PAGE_SIZE - 1); > + src_offset += cur_size; > } > + > dst_node_size -= cur_size; > if (!dst_node_size) { > - dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, > - dst->mem); > - dst_node_size = (dst_mm->size << PAGE_SHIFT); > - dst_page_offset = 0; > + ++dst_mm; > + dst_node_size = dst_mm->size << PAGE_SHIFT; > + dst_offset = 0; > } else { > - dst_node_start += cur_size; > - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); > + dst_offset += cur_size; > } > } > error: > @@ -754,8 +816,8 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re > static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, > unsigned long page_offset) > { > + uint64_t offset = (page_offset << PAGE_SHIFT); > struct drm_mm_node *mm; > - unsigned long offset = (page_offset << PAGE_SHIFT); > > mm = amdgpu_find_mm_node(&bo->mem, &offset); > return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + > @@ -1606,8 +1668,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > if (bo->mem.mem_type != TTM_PL_VRAM) > return -EIO; > > - nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); > - pos = (nodes->start << PAGE_SHIFT) + offset; > + pos = offset; > + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos); > + pos += (nodes->start << PAGE_SHIFT); > > while (len && pos < adev->gmc.mc_vram_size) { > uint64_t aligned_pos = pos & ~(uint64_t)3; > @@ -2033,72 +2096,6 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) > return ttm_bo_mmap(filp, vma, &adev->mman.bdev); > } > > -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, > - struct ttm_mem_reg *mem, unsigned num_pages, > - uint64_t offset, unsigned window, > - struct amdgpu_ring *ring, bool tmz, > - uint64_t *addr) > -{ > - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; > - struct amdgpu_device *adev = ring->adev; > - struct ttm_tt *ttm = bo->ttm; > - struct amdgpu_job *job; > - unsigned num_dw, num_bytes; > - dma_addr_t *dma_address; > - struct dma_fence *fence; > - uint64_t src_addr, dst_addr; > - uint64_t flags; > - int r; > - > - BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < > - AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); > - > - *addr = adev->gmc.gart_start; > - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * > - AMDGPU_GPU_PAGE_SIZE; > - > - num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); > - num_bytes = num_pages * 8; > - > - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); > - if (r) > - return r; > - > - src_addr = num_dw * 4; > - src_addr += job->ibs[0].gpu_addr; > - > - dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); > - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; > - amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, > - dst_addr, num_bytes, false); > - > - amdgpu_ring_pad_ib(ring, &job->ibs[0]); > - WARN_ON(job->ibs[0].length_dw > num_dw); > - > - dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; > - flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); > - if (tmz) > - flags |= AMDGPU_PTE_TMZ; > - > - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, > - &job->ibs[0].ptr[num_dw]); > - if (r) > - goto error_free; > - > - r = amdgpu_job_submit(job, &adev->mman.entity, > - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); > - if (r) > - goto error_free; > - > - dma_fence_put(fence); > - > - return r; > - > -error_free: > - amdgpu_job_free(job); > - return r; > -} > - > int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > uint64_t dst_offset, uint32_t byte_count, > struct dma_resv *resv, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index 21182caade21..11c0e79e7106 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -89,8 +89,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > struct dma_fence **fence, bool direct_submit, > bool vm_needs_flush, bool tmz); > int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > - struct amdgpu_copy_mem *src, > - struct amdgpu_copy_mem *dst, > + const struct amdgpu_copy_mem *src, > + const struct amdgpu_copy_mem *dst, > uint64_t size, bool tmz, > struct dma_resv *resv, > struct dma_fence **f); > -- > 2.14.1 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cray.huang%40amd.com%7C6997dac18dfc4e64aac708d7ce788068%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637204889261960280&sdata=ITH%2BUhbQHQH6%2FdxzObUfbUIY5rlZiz7TJ4epsohgBMM%3D&reserved=0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx