[PATCH] drm/amd/amdgpu: cover fragment size between 4 and 9 when not aligned

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Roger,

I think when you can select fragment automatically, you shouldn't 
involve the vm_manager.fragment_size to calculation, then we can use the 
properest  fragment for every segment and get the best performance.

So vm_manager.fragment_size should be always be -1, if it becomes valid 
fragment value, then we should always use it and disable automatic 
selection, which should only for validation usage.


Regards,

David Zhou


On 2017å¹´08æ??30æ?¥ 13:01, Roger He wrote:
> this can get performance improvement for some cases
>
> Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7
> Signed-off-by: Roger He <Hongbo.He at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98 +++++++++++++++++++++++++++-------
>   1 file changed, 79 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 592c3e7..4e5da5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   }
>   
>   /*
> - * amdgpu_vm_frag_ptes - add fragment information to PTEs
> + * amdgpu_vm_update_ptes_helper - add fragment information to PTEs
>    *
>    * @params: see amdgpu_pte_update_params definition
>    * @vm: requested vm
> @@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>    * @end: last PTE to handle
>    * @dst: addr those PTEs should point to
>    * @flags: hw mapping flags
> + * @fragment: fragment size
>    * Returns 0 for success, -EINVAL for failure.
>    */
> -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
> -				uint64_t start, uint64_t end,
> -				uint64_t dst, uint64_t flags)
> +static int amdgpu_vm_update_ptes_helper(struct amdgpu_pte_update_params *params,
> +				  uint64_t start, uint64_t end, uint64_t dst,
> +				  uint64_t flags, int fragment)
>   {
>   	int r;
>   
> @@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>   	 * Userspace can support this by aligning virtual base address and
>   	 * allocation size to the fragment size.
>   	 */
> -	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
> -	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> -	uint64_t frag_align = 1 << pages_per_frag;
> +	uint64_t frag_flags, frag_align, frag_start, frag_end;
>   
> -	uint64_t frag_start = ALIGN(start, frag_align);
> -	uint64_t frag_end = end & ~(frag_align - 1);
> +	if (start > end || fragment < 0)
> +		return -EINVAL;
>   
> -	/* system pages are non continuously */
> -	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
> -	    (frag_start >= frag_end))
> -		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
> +	fragment = min(fragment, max(0, fls64(end - start) - 1));
> +	frag_flags = AMDGPU_PTE_FRAG(fragment);
> +	frag_align = 1 << fragment;
> +	frag_start = ALIGN(start, frag_align);
> +	frag_end = end & ~(frag_align - 1);
> +
> +	if (frag_start >= frag_end) {
> +		if (fragment <= 4)
> +			return amdgpu_vm_update_ptes(params, start, end,
> +							dst, flags);
> +		else
> +			return amdgpu_vm_update_ptes_helper(params, start,
> +					end, dst, flags, fragment - 1);
> +	}
> +
> +	if (fragment <= 4) {
> +		/* handle the 4K area at the beginning */
> +		if (start != frag_start) {
> +			r = amdgpu_vm_update_ptes(params, start, frag_start,
> +						  dst, flags);
> +			if (r)
> +				return r;
> +			dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> +		}
> +
> +		/* handle the area in the middle */
> +		r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> +					  flags | frag_flags);
> +		if (r)
> +			return r;
> +
> +		/* handle the 4K area at the end */
> +		if (frag_end != end) {
> +			dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> +			r = amdgpu_vm_update_ptes(params, frag_end, end,
> +							dst, flags);
> +		}
> +		return r;
> +	}
>   
> -	/* handle the 4K area at the beginning */
> +	/* handle the area at the beginning not aligned */
>   	if (start != frag_start) {
> -		r = amdgpu_vm_update_ptes(params, start, frag_start,
> -					  dst, flags);
> +		r = amdgpu_vm_update_ptes_helper(params, start, frag_start,
> +						dst, flags, fragment - 1);
>   		if (r)
>   			return r;
>   		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>   	}
>   
> -	/* handle the area in the middle */
> +	/* handle the area in the middle aligned*/
>   	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>   				  flags | frag_flags);
>   	if (r)
>   		return r;
>   
> -	/* handle the 4K area at the end */
> +	/* handle the area at the end not aligned */
>   	if (frag_end != end) {
>   		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> -		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
> +		r = amdgpu_vm_update_ptes_helper(params, frag_end, end,
> +						dst, flags, fragment - 1);
>   	}
>   	return r;
>   }
>   
> +/*
> + * amdgpu_vm_frag_ptes - add fragment information to PTEs
> + *
> + * @params: see amdgpu_pte_update_params definition
> + * @vm: requested vm
> + * @start: first PTE to handle
> + * @end: last PTE to handle
> + * @dst: addr those PTEs should point to
> + * @flags: hw mapping flags
> + * Returns 0 for success, -EINVAL for failure.
> + */
> +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
> +				uint64_t start, uint64_t end,
> +				uint64_t dst, uint64_t flags)
> +{
> +	int fragment = params->adev->vm_manager.fragment_size;
> +	/* system pages are non continuously */
> +	if (params->src || !(flags & AMDGPU_PTE_VALID))
> +		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
> +
> +	/* vram */
> +	return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags,
> +						fragment);
> +}
> +
>   /**
>    * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>    *



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux