Am 31.08.2017 um 01:26 schrieb Felix Kuehling: > One comment inline. With that fixed, this patch is Reviewed-by: Felix > Kuehling <Felix.Kuehling at amd.com> > > > On 2017-08-30 09:48 AM, Christian König wrote: >> From: Roger He <Hongbo.He at amd.com> >> >> This can improve performance for some cases. >> >> v2 (chk): handle all sizes, simplify the patch quite a bit >> v3 (chk): adjust dw estimation as well >> >> Signed-off-by: Roger He <Hongbo.He at amd.com> >> Signed-off-by: Christian König <christian.koenig at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------ >> 1 file changed, 49 insertions(+), 25 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index b08f031..1575657 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, >> uint64_t start, uint64_t end, >> uint64_t dst, uint64_t flags) >> { >> - int r; >> - >> /** >> * The MC L1 TLB supports variable sized pages, based on a fragment >> * field in the PTE. When this field is set to a non-zero value, page >> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, >> * Userspace can support this by aligning virtual base address and >> * allocation size to the fragment size. >> */ >> - unsigned pages_per_frag = params->adev->vm_manager.fragment_size; >> - uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); >> - uint64_t frag_align = 1 << pages_per_frag; >> + unsigned max_frag = params->adev->vm_manager.fragment_size; >> + uint64_t frag_flags, frag_end; >> + unsigned frag; >> >> - uint64_t frag_start = ALIGN(start, frag_align); >> - uint64_t frag_end = end & ~(frag_align - 1); >> + int r; >> >> /* system pages are non continuously */ >> - if (params->src || !(flags & AMDGPU_PTE_VALID) || >> - (frag_start >= frag_end)) >> + if (params->src || !(flags & AMDGPU_PTE_VALID)) >> return amdgpu_vm_update_ptes(params, start, end, dst, flags); >> >> - /* handle the 4K area at the beginning */ >> - if (start != frag_start) { >> - r = amdgpu_vm_update_ptes(params, start, frag_start, >> - dst, flags); >> + /* Handle the fragments at the beginning */ >> + while (start != end) { >> + /* This intentionally wraps around if no bit is set */ >> + frag = min(ffs(start), fls64(end - start)) - 1; >> + if (frag >= max_frag) >> + break; >> + >> + frag_flags = AMDGPU_PTE_FRAG(frag); >> + frag_end = start + (1 << frag); >> + >> + r = amdgpu_vm_update_ptes(params, start, frag_end, dst, >> + flags | frag_flags); >> if (r) >> return r; >> - dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; >> + >> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; >> + start = frag_end; >> } >> >> /* handle the area in the middle */ >> - r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, >> - flags | frag_flags); >> - if (r) >> - return r; >> + if (start != end) { >> + frag_flags = AMDGPU_PTE_FRAG(max_frag); >> + frag_end = end & ~((1 << max_frag) - 1); > You need a cast to uint64_t to make sure your mask is big enough and > doesn't wipe out the high address bits: > > frag_end = end & ~(uint64_t)((1 << max_frag) - 1) Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should work as well, shouldn't it? Regards, Christian. > >> >> - /* handle the 4K area at the end */ >> - if (frag_end != end) { >> - dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; >> - r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); >> + r = amdgpu_vm_update_ptes(params, start, frag_end, dst, >> + flags | frag_flags); >> + if (r) >> + return r; >> + >> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; >> + start = frag_end; >> } >> - return r; >> + >> + /* Handle the fragments at the end */ >> + while (start != end) { >> + frag = fls64(end - start) - 1; >> + frag_flags = AMDGPU_PTE_FRAG(frag); >> + frag_end = start + (1 << frag); >> + >> + r = amdgpu_vm_update_ptes(params, start, frag_end, >> + dst, flags | frag_flags); >> + if (r) >> + return r; >> + >> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; >> + start = frag_end; >> + } >> + >> + return 0; >> } >> >> /** >> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >> /* set page commands needed */ >> ndw += ncmds * 10; >> >> - /* two extra commands for begin/end of fragment */ >> - ndw += 2 * 10; >> + /* extra commands for begin/end fragments */ >> + ndw += 2 * 10 * adev->vm_manager.fragment_size; >> >> params.func = amdgpu_vm_do_set_ptes; >> }