Am 31.08.2017 um 00:50 schrieb Felix Kuehling: > Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> > > Some more thoughts inline, but nothing that should be addressed in this > change. > > Regards, > Felix > > > On 2017-08-30 11:00 AM, Christian König wrote: >> From: Christian König <christian.koenig at amd.com> >> >> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't >> need to be specified in the BO lists. >> >> v2: validate PDs/PTs first >> >> Signed-off-by: Christian König <christian.koenig at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++---------- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ++- >> 3 files changed, 60 insertions(+), 26 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> index f68ac56..48e18cc 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) >> >> } >> >> - r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); >> + r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync); >> >> if (amdgpu_vm_debug && p->bo_list) { >> /* Invalidate all BOs to test for userspace bugs */ >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 4cdfb70..6cd20e7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, >> spin_unlock(&glob->lru_lock); >> } >> >> - if (vm->use_cpu_for_update) { >> + if (bo->tbo.type == ttm_bo_type_kernel && >> + vm->use_cpu_for_update) { >> r = amdgpu_bo_kmap(bo, NULL); >> if (r) >> return r; >> } >> >> spin_lock(&vm->status_lock); >> - list_move(&bo_base->vm_status, &vm->relocated); >> + if (bo->tbo.type != ttm_bo_type_kernel) >> + list_move(&bo_base->vm_status, &vm->moved); >> + else >> + list_move(&bo_base->vm_status, &vm->relocated); >> } >> spin_unlock(&vm->status_lock); >> >> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, >> } >> >> /** >> - * amdgpu_vm_clear_moved - clear moved BOs in the PT >> + * amdgpu_vm_handle_moved - handle moved BOs in the PT >> * >> * @adev: amdgpu_device pointer >> * @vm: requested vm >> + * @sync: sync object to add fences to >> * >> - * Make sure all moved BOs are cleared in the PT. >> + * Make sure all BOs which are moved are updated in the PTs. >> * Returns 0 for success. >> * >> - * PTs have to be reserved and mutex must be locked! >> + * PTs have to be reserved! >> */ >> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, >> - struct amdgpu_sync *sync) >> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev, >> + struct amdgpu_vm *vm, >> + struct amdgpu_sync *sync) >> { >> struct amdgpu_bo_va *bo_va = NULL; >> + bool clear; >> int r = 0; >> >> spin_lock(&vm->status_lock); >> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, >> struct amdgpu_bo_va, base.vm_status); >> spin_unlock(&vm->status_lock); >> >> - r = amdgpu_vm_bo_update(adev, bo_va, true); >> + /* Per VM BOs never need to bo cleared in the page tables */ >> + clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; >> + >> + r = amdgpu_vm_bo_update(adev, bo_va, clear); > Just thinking out loud: Per VM BOs don't need to be cleared because they > have separate lists for evicted and moved (but valid) BOs. Actually the reason is that we know they are reserved together with the page directory. > If we had a > similar thing for non-VM BOs (but without the automatic validation), > this case could be optimized. Then moved but valid BOs could be updated > in the page table instead of clearing them here and updating the page > table again later. Well, that won't work because other BOs can change their location while we try to use their location. But what I'm speculating about for a while now is to use trylock on the BOs here. That might not work all the time, but clearly most of the time. Going to give that a try now, Christian. > >> if (r) >> return r; >> >> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, >> return bo_va; >> } >> >> + >> +/** >> + * amdgpu_vm_bo_insert_mapping - insert a new mapping >> + * >> + * @adev: amdgpu_device pointer >> + * @bo_va: bo_va to store the address >> + * @mapping: the mapping to insert >> + * >> + * Insert a new mapping into all structures. >> + */ >> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, >> + struct amdgpu_bo_va *bo_va, >> + struct amdgpu_bo_va_mapping *mapping) >> +{ >> + struct amdgpu_vm *vm = bo_va->base.vm; >> + struct amdgpu_bo *bo = bo_va->base.bo; >> + >> + list_add(&mapping->list, &bo_va->invalids); >> + amdgpu_vm_it_insert(mapping, &vm->va); >> + >> + if (mapping->flags & AMDGPU_PTE_PRT) >> + amdgpu_vm_prt_get(adev); >> + >> + if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { >> + spin_lock(&vm->status_lock); >> + list_move(&bo_va->base.vm_status, &vm->moved); >> + spin_unlock(&vm->status_lock); >> + } >> + trace_amdgpu_vm_bo_map(bo_va, mapping); >> +} >> + >> /** >> * amdgpu_vm_bo_map - map bo inside a vm >> * >> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, >> if (!mapping) >> return -ENOMEM; >> >> - INIT_LIST_HEAD(&mapping->list); >> mapping->start = saddr; >> mapping->last = eaddr; >> mapping->offset = offset; >> mapping->flags = flags; >> >> - list_add(&mapping->list, &bo_va->invalids); >> - amdgpu_vm_it_insert(mapping, &vm->va); >> - >> - if (flags & AMDGPU_PTE_PRT) >> - amdgpu_vm_prt_get(adev); >> - trace_amdgpu_vm_bo_map(bo_va, mapping); >> + amdgpu_vm_bo_insert_map(adev, bo_va, mapping); >> >> return 0; >> } >> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, >> { >> struct amdgpu_bo_va_mapping *mapping; >> struct amdgpu_bo *bo = bo_va->base.bo; >> - struct amdgpu_vm *vm = bo_va->base.vm; >> uint64_t eaddr; >> int r; >> >> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, >> mapping->offset = offset; >> mapping->flags = flags; >> >> - list_add(&mapping->list, &bo_va->invalids); >> - amdgpu_vm_it_insert(mapping, &vm->va); >> - >> - if (flags & AMDGPU_PTE_PRT) >> - amdgpu_vm_prt_get(adev); >> - trace_amdgpu_vm_bo_map(bo_va, mapping); >> + amdgpu_vm_bo_insert_map(adev, bo_va, mapping); >> >> return 0; >> } >> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, >> bo_base->moved = true; >> if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { >> spin_lock(&bo_base->vm->status_lock); >> - list_move(&bo_base->vm_status, &vm->evicted); >> + if (bo->tbo.type == ttm_bo_type_kernel) >> + list_move(&bo_base->vm_status, &vm->evicted); >> + else >> + list_move_tail(&bo_base->vm_status, >> + &vm->evicted); >> spin_unlock(&bo_base->vm->status_lock); >> continue; >> } >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> index c3753af..90b7741 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, >> int amdgpu_vm_clear_freed(struct amdgpu_device *adev, >> struct amdgpu_vm *vm, >> struct dma_fence **fence); >> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, >> - struct amdgpu_sync *sync); >> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev, >> + struct amdgpu_vm *vm, >> + struct amdgpu_sync *sync); >> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >> struct amdgpu_bo_va *bo_va, >> bool clear); > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx