[PATCH 2/4] drm/amdgpu: add support for per VM BOs v2

deathsimple@xxxxxxxxxxx (Christian König) · Thu, 31 Aug 2017 14:20:58 +0200

Am 31.08.2017 um 00:50 schrieb Felix Kuehling:
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>
> Some more thoughts inline, but nothing that should be addressed in this
> change.
>
> Regards,
>    Felix
>
>
> On 2017-08-30 11:00 AM, Christian KÃ¶nig wrote:
>> From: Christian KÃ¶nig <christian.koenig at amd.com>
>>
>> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
>> need to be specified in the BO lists.
>>
>> v2: validate PDs/PTs first
>>
>> Signed-off-by: Christian KÃ¶nig <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>>   3 files changed, 60 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index f68ac56..48e18cc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>>   
>>   	}
>>   
>> -	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
>> +	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>>   
>>   	if (amdgpu_vm_debug && p->bo_list) {
>>   		/* Invalidate all BOs to test for userspace bugs */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 4cdfb70..6cd20e7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			spin_unlock(&glob->lru_lock);
>>   		}
>>   
>> -		if (vm->use_cpu_for_update) {
>> +		if (bo->tbo.type == ttm_bo_type_kernel &&
>> +		    vm->use_cpu_for_update) {
>>   			r = amdgpu_bo_kmap(bo, NULL);
>>   			if (r)
>>   				return r;
>>   		}
>>   
>>   		spin_lock(&vm->status_lock);
>> -		list_move(&bo_base->vm_status, &vm->relocated);
>> +		if (bo->tbo.type != ttm_bo_type_kernel)
>> +			list_move(&bo_base->vm_status, &vm->moved);
>> +		else
>> +			list_move(&bo_base->vm_status, &vm->relocated);
>>   	}
>>   	spin_unlock(&vm->status_lock);
>>   
>> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   }
>>   
>>   /**
>> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
>> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>>    *
>>    * @adev: amdgpu_device pointer
>>    * @vm: requested vm
>> + * @sync: sync object to add fences to
>>    *
>> - * Make sure all moved BOs are cleared in the PT.
>> + * Make sure all BOs which are moved are updated in the PTs.
>>    * Returns 0 for success.
>>    *
>> - * PTs have to be reserved and mutex must be locked!
>> + * PTs have to be reserved!
>>    */
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -			    struct amdgpu_sync *sync)
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +			   struct amdgpu_vm *vm,
>> +			   struct amdgpu_sync *sync)
>>   {
>>   	struct amdgpu_bo_va *bo_va = NULL;
>> +	bool clear;
>>   	int r = 0;
>>   
>>   	spin_lock(&vm->status_lock);
>> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			struct amdgpu_bo_va, base.vm_status);
>>   		spin_unlock(&vm->status_lock);
>>   
>> -		r = amdgpu_vm_bo_update(adev, bo_va, true);
>> +		/* Per VM BOs never need to bo cleared in the page tables */
>> +		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
>> +
>> +		r = amdgpu_vm_bo_update(adev, bo_va, clear);
> Just thinking out loud: Per VM BOs don't need to be cleared because they
> have separate lists for evicted and moved (but valid) BOs.

Actually the reason is that we know they are reserved together with the 
page directory.

> If we had a
> similar thing for non-VM BOs (but without the automatic validation),
> this case could be optimized. Then moved but valid BOs could be updated
> in the page table instead of clearing them here and updating the page
> table again later.

Well, that won't work because other BOs can change their location while 
we try to use their location.

But what I'm speculating about for a while now is to use trylock on the 
BOs here. That might not work all the time, but clearly most of the time.

Going to give that a try now,
Christian.

>
>>   		if (r)
>>   			return r;
>>   
>> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>>   	return bo_va;
>>   }
>>   
>> +
>> +/**
>> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @bo_va: bo_va to store the address
>> + * @mapping: the mapping to insert
>> + *
>> + * Insert a new mapping into all structures.
>> + */
>> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>> +				    struct amdgpu_bo_va *bo_va,
>> +				    struct amdgpu_bo_va_mapping *mapping)
>> +{
>> +	struct amdgpu_vm *vm = bo_va->base.vm;
>> +	struct amdgpu_bo *bo = bo_va->base.bo;
>> +
>> +	list_add(&mapping->list, &bo_va->invalids);
>> +	amdgpu_vm_it_insert(mapping, &vm->va);
>> +
>> +	if (mapping->flags & AMDGPU_PTE_PRT)
>> +		amdgpu_vm_prt_get(adev);
>> +
>> +	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> +		spin_lock(&vm->status_lock);
>> +		list_move(&bo_va->base.vm_status, &vm->moved);
>> +		spin_unlock(&vm->status_lock);
>> +	}
>> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +}
>> +
>>   /**
>>    * amdgpu_vm_bo_map - map bo inside a vm
>>    *
>> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>   	if (!mapping)
>>   		return -ENOMEM;
>>   
>> -	INIT_LIST_HEAD(&mapping->list);
>>   	mapping->start = saddr;
>>   	mapping->last = eaddr;
>>   	mapping->offset = offset;
>>   	mapping->flags = flags;
>>   
>> -	list_add(&mapping->list, &bo_va->invalids);
>> -	amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -	if (flags & AMDGPU_PTE_PRT)
>> -		amdgpu_vm_prt_get(adev);
>> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>   
>>   	return 0;
>>   }
>> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>   {
>>   	struct amdgpu_bo_va_mapping *mapping;
>>   	struct amdgpu_bo *bo = bo_va->base.bo;
>> -	struct amdgpu_vm *vm = bo_va->base.vm;
>>   	uint64_t eaddr;
>>   	int r;
>>   
>> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>   	mapping->offset = offset;
>>   	mapping->flags = flags;
>>   
>> -	list_add(&mapping->list, &bo_va->invalids);
>> -	amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -	if (flags & AMDGPU_PTE_PRT)
>> -		amdgpu_vm_prt_get(adev);
>> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>   
>>   	return 0;
>>   }
>> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>   		bo_base->moved = true;
>>   		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>   			spin_lock(&bo_base->vm->status_lock);
>> -			list_move(&bo_base->vm_status, &vm->evicted);
>> +			if (bo->tbo.type == ttm_bo_type_kernel)
>> +				list_move(&bo_base->vm_status, &vm->evicted);
>> +			else
>> +				list_move_tail(&bo_base->vm_status,
>> +					       &vm->evicted);
>>   			spin_unlock(&bo_base->vm->status_lock);
>>   			continue;
>>   		}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index c3753af..90b7741 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   			  struct amdgpu_vm *vm,
>>   			  struct dma_fence **fence);
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -			  struct amdgpu_sync *sync);
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +			   struct amdgpu_vm *vm,
>> +			   struct amdgpu_sync *sync);
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   			struct amdgpu_bo_va *bo_va,
>>   			bool clear);
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx