this can get performance improvement for some cases Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7 Signed-off-by: Roger He <Hongbo.He at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98 +++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 592c3e7..4e5da5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, } /* - * amdgpu_vm_frag_ptes - add fragment information to PTEs + * amdgpu_vm_update_ptes_helper - add fragment information to PTEs * * @params: see amdgpu_pte_update_params definition * @vm: requested vm @@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @end: last PTE to handle * @dst: addr those PTEs should point to * @flags: hw mapping flags + * @fragment: fragment size * Returns 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) +static int amdgpu_vm_update_ptes_helper(struct amdgpu_pte_update_params *params, + uint64_t start, uint64_t end, uint64_t dst, + uint64_t flags, int fragment) { int r; @@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * Userspace can support this by aligning virtual base address and * allocation size to the fragment size. */ - unsigned pages_per_frag = params->adev->vm_manager.fragment_size; - uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); - uint64_t frag_align = 1 << pages_per_frag; + uint64_t frag_flags, frag_align, frag_start, frag_end; - uint64_t frag_start = ALIGN(start, frag_align); - uint64_t frag_end = end & ~(frag_align - 1); + if (start > end || fragment < 0) + return -EINVAL; - /* system pages are non continuously */ - if (params->src || !(flags & AMDGPU_PTE_VALID) || - (frag_start >= frag_end)) - return amdgpu_vm_update_ptes(params, start, end, dst, flags); + fragment = min(fragment, max(0, fls64(end - start) - 1)); + frag_flags = AMDGPU_PTE_FRAG(fragment); + frag_align = 1 << fragment; + frag_start = ALIGN(start, frag_align); + frag_end = end & ~(frag_align - 1); + + if (frag_start >= frag_end) { + if (fragment <= 4) + return amdgpu_vm_update_ptes(params, start, end, + dst, flags); + else + return amdgpu_vm_update_ptes_helper(params, start, + end, dst, flags, fragment - 1); + } + + if (fragment <= 4) { + /* handle the 4K area at the beginning */ + if (start != frag_start) { + r = amdgpu_vm_update_ptes(params, start, frag_start, + dst, flags); + if (r) + return r; + dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; + } + + /* handle the area in the middle */ + r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, + flags | frag_flags); + if (r) + return r; + + /* handle the 4K area at the end */ + if (frag_end != end) { + dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; + r = amdgpu_vm_update_ptes(params, frag_end, end, + dst, flags); + } + return r; + } - /* handle the 4K area at the beginning */ + /* handle the area at the beginning not aligned */ if (start != frag_start) { - r = amdgpu_vm_update_ptes(params, start, frag_start, - dst, flags); + r = amdgpu_vm_update_ptes_helper(params, start, frag_start, + dst, flags, fragment - 1); if (r) return r; dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; } - /* handle the area in the middle */ + /* handle the area in the middle aligned*/ r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, flags | frag_flags); if (r) return r; - /* handle the 4K area at the end */ + /* handle the area at the end not aligned */ if (frag_end != end) { dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; - r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); + r = amdgpu_vm_update_ptes_helper(params, frag_end, end, + dst, flags, fragment - 1); } return r; } +/* + * amdgpu_vm_frag_ptes - add fragment information to PTEs + * + * @params: see amdgpu_pte_update_params definition + * @vm: requested vm + * @start: first PTE to handle + * @end: last PTE to handle + * @dst: addr those PTEs should point to + * @flags: hw mapping flags + * Returns 0 for success, -EINVAL for failure. + */ +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) +{ + int fragment = params->adev->vm_manager.fragment_size; + /* system pages are non continuously */ + if (params->src || !(flags & AMDGPU_PTE_VALID)) + return amdgpu_vm_update_ptes(params, start, end, dst, flags); + + /* vram */ + return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags, + fragment); +} + /** * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * -- 2.7.4