[PATCH 4/4] drm/amdgpu: fill only the lower range with ATS entries

ckoenig.leichtzumerken@xxxxxxxxx (Christian König) · Fri, 26 Jan 2018 21:21:36 +0100

The amdgpu_vm_clear_bo function takes over this functionality in the 
first patch.

This patch only limits filling in the ats values in the lower halve of 
the address range (previously it was filled in the whole address space).

Regards,
Christian.

Am 26.01.2018 um 21:18 schrieb Felix Kuehling:
> Shouldn't this change come before all the reverts? Otherwise you're
> briefly breaking ATS support on Raven for KFD.
>
> Regards,
>  Â  Felix
>
>
> On 2018-01-26 05:04 AM, Christian KÃ¶nig wrote:
>> At least on x86-64 the upper range is purely used by the kernel,
>> avoid creating any ATS mappings there as security precaution and to
>> allow proper page fault reporting in the upper range.
>>
>> Signed-off-by: Christian KÃ¶nig <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 83 ++++++++++++++++++++++------------
>>   1 file changed, 54 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 14798e20abca..a3b9c3976eb3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -267,24 +267,34 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
>>    * Root PD needs to be reserved when calling this.
>>    */
>>   static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>> -			      struct amdgpu_vm *vm,
>> -			      struct amdgpu_bo *bo,
>> -			      unsigned level)
>> +			      struct amdgpu_vm *vm, struct amdgpu_bo *bo,
>> +			      unsigned level, bool pte_support_ats)
>>   {
>>   	struct ttm_operation_ctx ctx = { true, false };
>>   	struct dma_fence *fence = NULL;
>> -	uint64_t addr, init_value;
>> +	unsigned entries, ats_entries;
>> +	uint64_t addr, ats_value;
>>   	struct amdgpu_ring *ring;
>>   	struct amdgpu_job *job;
>> -	unsigned entries;
>>   	int r;
>>   
>> -	if (vm->pte_support_ats) {
>> -		init_value = AMDGPU_PTE_DEFAULT_ATC;
>> -		if (level != AMDGPU_VM_PTB)
>> -			init_value |= AMDGPU_PDE_PTE;
>> +	addr = amdgpu_bo_gpu_offset(bo);
>> +	entries = amdgpu_bo_size(bo) / 8;
>> +
>> +	if (pte_support_ats) {
>> +		if (level == adev->vm_manager.root_level) {
>> +			ats_entries = amdgpu_vm_level_shift(adev, level);
>> +			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
>> +			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
>> +			ats_entries = min(ats_entries, entries);
>> +			entries -= ats_entries;
>> +		} else {
>> +			ats_entries = entries;
>> +			entries = 0;
>> +		}
>>   	} else {
>> -		init_value = 0;
>> +		ats_entries = 0;
>> +		ats_value = 0;
>>   	}
>>   
>>   	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>> @@ -297,15 +307,26 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>>   	if (r)
>>   		goto error;
>>   
>> -	addr = amdgpu_bo_gpu_offset(bo);
>> -	entries = amdgpu_bo_size(bo) / 8;
>> -
>>   	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
>>   	if (r)
>>   		goto error;
>>   
>> -	amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
>> -			      entries, 0, init_value);
>> +	if (ats_entries) {
>> +		uint64_t ats_value;
>> +
>> +		ats_value = AMDGPU_PTE_DEFAULT_ATC;
>> +		if (level != AMDGPU_VM_PTB)
>> +			ats_value |= AMDGPU_PDE_PTE;
>> +
>> +		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
>> +				      ats_entries, 0, ats_value);
>> +		addr += ats_entries * 8;
>> +	}
>> +
>> +	if (entries)
>> +		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
>> +				      entries, 0, 0);
>> +
>>   	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>>   
>>   	WARN_ON(job->ibs[0].length_dw > 64);
>> @@ -339,7 +360,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>>   				  struct amdgpu_vm *vm,
>>   				  struct amdgpu_vm_pt *parent,
>>   				  uint64_t saddr, uint64_t eaddr,
>> -				  unsigned level)
>> +				  unsigned level, bool ats)
>>   {
>>   	unsigned shift = amdgpu_vm_level_shift(adev, level);
>>   	unsigned pt_idx, from, to;
>> @@ -389,7 +410,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>>   			if (r)
>>   				return r;
>>   
>> -			r = amdgpu_vm_clear_bo(adev, vm, pt, level);
>> +			r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
>>   			if (r) {
>>   				amdgpu_bo_unref(&pt);
>>   				return r;
>> @@ -421,7 +442,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>>   			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>>   				((1 << shift) - 1);
>>   			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
>> -						   sub_eaddr, level);
>> +						   sub_eaddr, level, ats);
>>   			if (r)
>>   				return r;
>>   		}
>> @@ -444,26 +465,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>   			struct amdgpu_vm *vm,
>>   			uint64_t saddr, uint64_t size)
>>   {
>> -	uint64_t last_pfn;
>>   	uint64_t eaddr;
>> +	bool ats = false;
>>   
>>   	/* validate the parameters */
>>   	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
>>   		return -EINVAL;
>>   
>>   	eaddr = saddr + size - 1;
>> -	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
>> -	if (last_pfn >= adev->vm_manager.max_pfn) {
>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>> -			last_pfn, adev->vm_manager.max_pfn);
>> -		return -EINVAL;
>> -	}
>> +
>> +	if (vm->pte_support_ats)
>> +		ats = saddr < AMDGPU_VA_HOLE_START;
>>   
>>   	saddr /= AMDGPU_GPU_PAGE_SIZE;
>>   	eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>   
>> +	if (eaddr >= adev->vm_manager.max_pfn) {
>> +		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>> +			eaddr, adev->vm_manager.max_pfn);
>> +		return -EINVAL;
>> +	}
>> +
>>   	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
>> -				      adev->vm_manager.root_level);
>> +				      adev->vm_manager.root_level, ats);
>>   }
>>   
>>   /**
>> @@ -1665,16 +1689,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   			  struct dma_fence **fence)
>>   {
>>   	struct amdgpu_bo_va_mapping *mapping;
>> +	uint64_t init_pte_value = 0;
>>   	struct dma_fence *f = NULL;
>>   	int r;
>> -	uint64_t init_pte_value = 0;
>>   
>>   	while (!list_empty(&vm->freed)) {
>>   		mapping = list_first_entry(&vm->freed,
>>   			struct amdgpu_bo_va_mapping, list);
>>   		list_del(&mapping->list);
>>   
>> -		if (vm->pte_support_ats)
>> +		if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
>>   			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>   
>>   		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
>> @@ -2367,7 +2391,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   		goto error_free_root;
>>   
>>   	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
>> -			       adev->vm_manager.root_level);
>> +			       adev->vm_manager.root_level,
>> +			       vm->pte_support_ats);
>>   	if (r)
>>   		goto error_unreserve;
>>