Hi Roger, I think when you can select fragment automatically, you shouldn't involve the vm_manager.fragment_size to calculation, then we can use the properest fragment for every segment and get the best performance. So vm_manager.fragment_size should be always be -1, if it becomes valid fragment value, then we should always use it and disable automatic selection, which should only for validation usage. Regards, David Zhou On 2017å¹´08æ??30æ?¥ 13:01, Roger He wrote: > this can get performance improvement for some cases > > Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7 > Signed-off-by: Roger He <Hongbo.He at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98 +++++++++++++++++++++++++++------- > 1 file changed, 79 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 592c3e7..4e5da5e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, > } > > /* > - * amdgpu_vm_frag_ptes - add fragment information to PTEs > + * amdgpu_vm_update_ptes_helper - add fragment information to PTEs > * > * @params: see amdgpu_pte_update_params definition > * @vm: requested vm > @@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, > * @end: last PTE to handle > * @dst: addr those PTEs should point to > * @flags: hw mapping flags > + * @fragment: fragment size > * Returns 0 for success, -EINVAL for failure. > */ > -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, > - uint64_t start, uint64_t end, > - uint64_t dst, uint64_t flags) > +static int amdgpu_vm_update_ptes_helper(struct amdgpu_pte_update_params *params, > + uint64_t start, uint64_t end, uint64_t dst, > + uint64_t flags, int fragment) > { > int r; > > @@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, > * Userspace can support this by aligning virtual base address and > * allocation size to the fragment size. > */ > - unsigned pages_per_frag = params->adev->vm_manager.fragment_size; > - uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); > - uint64_t frag_align = 1 << pages_per_frag; > + uint64_t frag_flags, frag_align, frag_start, frag_end; > > - uint64_t frag_start = ALIGN(start, frag_align); > - uint64_t frag_end = end & ~(frag_align - 1); > + if (start > end || fragment < 0) > + return -EINVAL; > > - /* system pages are non continuously */ > - if (params->src || !(flags & AMDGPU_PTE_VALID) || > - (frag_start >= frag_end)) > - return amdgpu_vm_update_ptes(params, start, end, dst, flags); > + fragment = min(fragment, max(0, fls64(end - start) - 1)); > + frag_flags = AMDGPU_PTE_FRAG(fragment); > + frag_align = 1 << fragment; > + frag_start = ALIGN(start, frag_align); > + frag_end = end & ~(frag_align - 1); > + > + if (frag_start >= frag_end) { > + if (fragment <= 4) > + return amdgpu_vm_update_ptes(params, start, end, > + dst, flags); > + else > + return amdgpu_vm_update_ptes_helper(params, start, > + end, dst, flags, fragment - 1); > + } > + > + if (fragment <= 4) { > + /* handle the 4K area at the beginning */ > + if (start != frag_start) { > + r = amdgpu_vm_update_ptes(params, start, frag_start, > + dst, flags); > + if (r) > + return r; > + dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; > + } > + > + /* handle the area in the middle */ > + r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, > + flags | frag_flags); > + if (r) > + return r; > + > + /* handle the 4K area at the end */ > + if (frag_end != end) { > + dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; > + r = amdgpu_vm_update_ptes(params, frag_end, end, > + dst, flags); > + } > + return r; > + } > > - /* handle the 4K area at the beginning */ > + /* handle the area at the beginning not aligned */ > if (start != frag_start) { > - r = amdgpu_vm_update_ptes(params, start, frag_start, > - dst, flags); > + r = amdgpu_vm_update_ptes_helper(params, start, frag_start, > + dst, flags, fragment - 1); > if (r) > return r; > dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; > } > > - /* handle the area in the middle */ > + /* handle the area in the middle aligned*/ > r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, > flags | frag_flags); > if (r) > return r; > > - /* handle the 4K area at the end */ > + /* handle the area at the end not aligned */ > if (frag_end != end) { > dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; > - r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); > + r = amdgpu_vm_update_ptes_helper(params, frag_end, end, > + dst, flags, fragment - 1); > } > return r; > } > > +/* > + * amdgpu_vm_frag_ptes - add fragment information to PTEs > + * > + * @params: see amdgpu_pte_update_params definition > + * @vm: requested vm > + * @start: first PTE to handle > + * @end: last PTE to handle > + * @dst: addr those PTEs should point to > + * @flags: hw mapping flags > + * Returns 0 for success, -EINVAL for failure. > + */ > +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, > + uint64_t start, uint64_t end, > + uint64_t dst, uint64_t flags) > +{ > + int fragment = params->adev->vm_manager.fragment_size; > + /* system pages are non continuously */ > + if (params->src || !(flags & AMDGPU_PTE_VALID)) > + return amdgpu_vm_update_ptes(params, start, end, dst, flags); > + > + /* vram */ > + return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags, > + fragment); > +} > + > /** > * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table > *