Am 01.09.2018 um 18:45 schrieb Kuehling, Felix: >>>> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, >>>> uint32_t mem_type = bo->tbo.mem.mem_type; >>>> >>>> if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) >>>> - list_add_tail(&bo_va->base.vm_status, &vm->evicted); >>>> + amdgpu_vm_bo_evicted(&bo_va->base); >>>> else >>>> - list_add(&bo_va->base.vm_status, &vm->idle); >>>> + amdgpu_vm_bo_idle(&bo_va->base); >>> There is a small change in behaviour here for clearing >>> bo_va->base.moved. Not sure if it matters. >> Using list_add is minimal more efficient. > What I meant was that the original code would not always set bo_va->base.moved = false (it's done a few lines above this under some conditions, but not always). The new code will always set bo_va->base.moved = false inside amdgpu_vm_bo_idle. Good point as well, but that is indeed correct. Regards, Christian. > > Regards, > Felix > > ________________________________________ > From: Christian König <ckoenig.leichtzumerken at gmail.com> > Sent: Saturday, September 1, 2018 4:16:17 AM > To: Kuehling, Felix; amd-gfx at lists.freedesktop.org > Subject: Re: [PATCH 2/2] drm/amdgpu: improve VM state machine documentation > > Am 01.09.2018 um 01:51 schrieb Felix Kuehling: >> Thanks for this. A few comments and a question inline. >> >> On 2018-08-31 09:27 AM, Christian König wrote: >>> Since we have a lot of FAQ on the VM state machine try to improve the >>> documentation by adding functions for each state move. >>> >>> Signed-off-by: Christian König <christian.koenig at amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 ++++++++++++++++++++++++--------- >>> 1 file changed, 79 insertions(+), 28 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> index a9275a99d793..40c22635fefd 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> @@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) >>> return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); >>> } >>> >>> +/** >>> + * amdgpu_vm_bo_evicted - vm_bo is evicted >>> + * >>> + * @vm_bo: vm_bo which is evicted >>> + * >>> + * State for PDs/PTs and per VM BOs which are not at the location they should >>> + * be. >>> + */ >>> +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) >>> +{ >>> + struct amdgpu_vm *vm = vm_bo->vm; >>> + struct amdgpu_bo *bo = vm_bo->bo; >>> + >>> + vm_bo->moved = true; >>> + if (bo->tbo.type == ttm_bo_type_kernel) >>> + list_move(&vm_bo->vm_status, &vm->evicted); >>> + else >>> + list_move_tail(&vm_bo->vm_status, &vm->evicted); >>> +} >>> + >>> +/** >>> + * amdgpu_vm_bo_relocated - vm_bo is reloacted >>> + * >>> + * @vm_bo: vm_bo which is relocated >>> + * >>> + * State for PDs/PTs which needs to update their parent PD. >>> + */ >>> +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) >>> +{ >>> + list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); >>> +} >>> + >>> +/** >>> + * amdgpu_vm_bo_moved - vm_bo is moved >>> + * >>> + * @vm_bo: vm_bo which is moved >>> + * >>> + * State for per VM and normal BOs which are moved, but that change is not yet >>> + * reflected in the page tables. >> I have a question here. Why does amdgpu_cs_vm_handling call >> amdgpu_vm_bo_update manually for its BO list entries? Wouldn't it be >> enough to just call amdgpu_vm_handle_moved? > No, it is still possible that the mapping of the BO was never updated > because the VM page tables where evicted. > > We still need to make sure that all BOs of the current submission are > updated correctly. > >>> + */ >>> +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) >>> +{ >>> + struct amdgpu_vm *vm = vm_bo->vm; >>> + >>> + spin_lock(&vm->moved_lock); >>> + list_move(&vm_bo->vm_status, &vm->moved); >>> + spin_unlock(&vm->moved_lock); >> If vm->moved_lock protects the moved list, do we also need to take it >> whenever something is moved from that list? > Correct, yes. > >> That could potentially be >> any list_move operation that uses vm_bo->vm_status. I found one case >> below where that may not be handled correctly. >> >>> +} >>> + >>> +/** >>> + * amdgpu_vm_bo_idle - vm_bo is idle >>> + * >>> + * @vm_bo: vm_bo which is now idle >>> + * >>> + * State for PDs/PTs and per VM BOs which have gone through the state machine >>> + * and are now idle. >>> + */ >>> +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) >>> +{ >>> + list_move(&vm_bo->vm_status, &vm_bo->vm->idle); >>> + vm_bo->moved = false; >>> +} >>> + >>> /** >>> * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm >>> * >>> @@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >>> >>> vm->bulk_moveable = false; >>> if (bo->tbo.type == ttm_bo_type_kernel) >>> - list_move(&base->vm_status, &vm->relocated); >>> + amdgpu_vm_bo_relocated(base); >>> else >>> - list_move(&base->vm_status, &vm->idle); >>> + amdgpu_vm_bo_idle(base); >>> >>> if (bo->preferred_domains & >>> amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) >>> @@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >>> * is currently evicted. add the bo to the evicted list to make sure it >>> * is validated on next vm use to avoid fault. >>> * */ >>> - list_move_tail(&base->vm_status, &vm->evicted); >>> - base->moved = true; >>> + amdgpu_vm_bo_evicted(base); >>> } >>> >>> /** >>> @@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, >>> break; >>> >>> if (bo->tbo.type != ttm_bo_type_kernel) { >>> - spin_lock(&vm->moved_lock); >>> - list_move(&bo_base->vm_status, &vm->moved); >>> - spin_unlock(&vm->moved_lock); >>> + amdgpu_vm_bo_moved(bo_base); >>> } else { >>> if (vm->use_cpu_for_update) >>> r = amdgpu_bo_kmap(bo, NULL); >>> @@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, >>> r = amdgpu_ttm_alloc_gart(&bo->tbo); >>> if (r) >>> break; >>> - list_move(&bo_base->vm_status, &vm->relocated); >>> + amdgpu_vm_bo_relocated(bo_base); >>> } >>> } >>> >>> @@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, >>> bo_base = list_first_entry(&vm->relocated, >>> struct amdgpu_vm_bo_base, >>> vm_status); >>> - bo_base->moved = false; >>> - list_move(&bo_base->vm_status, &vm->idle); >>> + amdgpu_vm_bo_idle(bo_base); >>> >>> bo = bo_base->bo->parent; >>> if (!bo) >>> @@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, >>> if (entry->huge) { >>> /* Add the entry to the relocated list to update it. */ >>> entry->huge = false; >>> - list_move(&entry->base.vm_status, &p->vm->relocated); >>> + amdgpu_vm_bo_relocated(&entry->base); >>> } >>> return; >>> } >>> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, >>> uint32_t mem_type = bo->tbo.mem.mem_type; >>> >>> if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) >>> - list_add_tail(&bo_va->base.vm_status, &vm->evicted); >>> + amdgpu_vm_bo_evicted(&bo_va->base); >>> else >>> - list_add(&bo_va->base.vm_status, &vm->idle); >>> + amdgpu_vm_bo_idle(&bo_va->base); >> There is a small change in behaviour here for clearing >> bo_va->base.moved. Not sure if it matters. > Using list_add is minimal more efficient. > >>> } >>> >>> list_splice_init(&bo_va->invalids, &bo_va->valids); >>> @@ -2472,28 +2531,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, >>> >>> list_for_each_entry(bo_base, &bo->va, bo_list) { >>> struct amdgpu_vm *vm = bo_base->vm; >>> - bool was_moved = bo_base->moved; >>> >>> - bo_base->moved = true; >>> if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { >>> - if (bo->tbo.type == ttm_bo_type_kernel) >>> - list_move(&bo_base->vm_status, &vm->evicted); >>> - else >>> - list_move_tail(&bo_base->vm_status, >>> - &vm->evicted); >>> + amdgpu_vm_bo_evicted(bo_base); >> I think here it's possible that the BO was on the moved list. I think >> that means amdgpu_vm_bo_evicted should take the moved_lock just in case. > Good point, probably best if I split up the moved list into per VM BOs > and all other BOs. > > That should make the locking much more clear. > > Regards, > Christian. > >> Regards, >> Felix >> >>> continue; >>> } >>> >>> - if (was_moved) >>> + if (bo_base->moved) >>> continue; >>> >>> - if (bo->tbo.type == ttm_bo_type_kernel) { >>> - list_move(&bo_base->vm_status, &vm->relocated); >>> - } else { >>> - spin_lock(&bo_base->vm->moved_lock); >>> - list_move(&bo_base->vm_status, &vm->moved); >>> - spin_unlock(&bo_base->vm->moved_lock); >>> - } >>> + bo_base->moved = true; >>> + if (bo->tbo.type == ttm_bo_type_kernel) >>> + amdgpu_vm_bo_relocated(bo_base); >>> + else >>> + amdgpu_vm_bo_moved(bo_base); >>> } >>> } >>>