[PATCH 2/2] drm/amdgpu: handle all fragment sizes v3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 31.08.2017 um 01:26 schrieb Felix Kuehling:
> One comment inline. With that fixed, this patch is Reviewed-by: Felix
> Kuehling <Felix.Kuehling at amd.com>
>
>
> On 2017-08-30 09:48 AM, Christian König wrote:
>> From: Roger He <Hongbo.He at amd.com>
>>
>> This can improve performance for some cases.
>>
>> v2 (chk): handle all sizes, simplify the patch quite a bit
>> v3 (chk): adjust dw estimation as well
>>
>> Signed-off-by: Roger He <Hongbo.He at amd.com>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>>   1 file changed, 49 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b08f031..1575657 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>>   				uint64_t start, uint64_t end,
>>   				uint64_t dst, uint64_t flags)
>>   {
>> -	int r;
>> -
>>   	/**
>>   	 * The MC L1 TLB supports variable sized pages, based on a fragment
>>   	 * field in the PTE. When this field is set to a non-zero value, page
>> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
>>   	 * Userspace can support this by aligning virtual base address and
>>   	 * allocation size to the fragment size.
>>   	 */
>> -	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
>> -	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
>> -	uint64_t frag_align = 1 << pages_per_frag;
>> +	unsigned max_frag = params->adev->vm_manager.fragment_size;
>> +	uint64_t frag_flags, frag_end;
>> +	unsigned frag;
>>   
>> -	uint64_t frag_start = ALIGN(start, frag_align);
>> -	uint64_t frag_end = end & ~(frag_align - 1);
>> +	int r;
>>   
>>   	/* system pages are non continuously */
>> -	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
>> -	    (frag_start >= frag_end))
>> +	if (params->src || !(flags & AMDGPU_PTE_VALID))
>>   		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>>   
>> -	/* handle the 4K area at the beginning */
>> -	if (start != frag_start) {
>> -		r = amdgpu_vm_update_ptes(params, start, frag_start,
>> -					  dst, flags);
>> +	/* Handle the fragments at the beginning */
>> +	while (start != end) {
>> +		/* This intentionally wraps around if no bit is set */
>> +		frag = min(ffs(start), fls64(end - start)) - 1;
>> +		if (frag >= max_frag)
>> +			break;
>> +
>> +		frag_flags = AMDGPU_PTE_FRAG(frag);
>> +		frag_end = start + (1 << frag);
>> +
>> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +					  flags | frag_flags);
>>   		if (r)
>>   			return r;
>> -		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>> +
>> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +		start = frag_end;
>>   	}
>>   
>>   	/* handle the area in the middle */
>> -	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>> -				  flags | frag_flags);
>> -	if (r)
>> -		return r;
>> +	if (start != end) {
>> +		frag_flags = AMDGPU_PTE_FRAG(max_frag);
>> +		frag_end = end & ~((1 << max_frag) - 1);
> You need a cast to uint64_t to make sure your mask is big enough and
> doesn't wipe out the high address bits:
>
>      frag_end = end & ~(uint64_t)((1 << max_frag) - 1)

Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should 
work as well, shouldn't it?

Regards,
Christian.

>
>>   
>> -	/* handle the 4K area at the end */
>> -	if (frag_end != end) {
>> -		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
>> -		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
>> +		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +					  flags | frag_flags);
>> +		if (r)
>> +			return r;
>> +
>> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +		start = frag_end;
>>   	}
>> -	return r;
>> +
>> +	/* Handle the fragments at the end */
>> +	while (start != end) {
>> +		frag = fls64(end - start) - 1;
>> +		frag_flags = AMDGPU_PTE_FRAG(frag);
>> +		frag_end = start + (1 << frag);
>> +
>> +		r = amdgpu_vm_update_ptes(params, start, frag_end,
>> +					  dst, flags | frag_flags);
>> +		if (r)
>> +			return r;
>> +
>> +		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +		start = frag_end;
>> +	}
>> +
>> +	return 0;
>>   }
>>   
>>   /**
>> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   		/* set page commands needed */
>>   		ndw += ncmds * 10;
>>   
>> -		/* two extra commands for begin/end of fragment */
>> -		ndw += 2 * 10;
>> +		/* extra commands for begin/end fragments */
>> +		ndw += 2 * 10 * adev->vm_manager.fragment_size;
>>   
>>   		params.func = amdgpu_vm_do_set_ptes;
>>   	}




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux