> 2020年2月19日 07:10,Kuehling, Felix <Felix.Kuehling@xxxxxxx> 写道: > > Hi Xinhui, > > Two suggestions inline. Looks good to me otherwise. > > On 2020-02-17 10:36 p.m., xinhui pan wrote: >> No need to trigger eviction as the memory mapping will not be used >> anymore. >> >> All pt/pd bos share same resv, hence the same shared eviction fence. >> Everytime page table is freed, the fence will be signled and that cuases >> kfd unexcepted evictions. >> >> Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> >> CC: Christian König <christian.koenig@xxxxxxx> >> CC: Felix Kuehling <felix.kuehling@xxxxxxx> >> CC: Alex Deucher <alexander.deucher@xxxxxxx> >> --- >> change from v4: >> based on new ttm code. >> >> change from v3: >> fix a coding error >> >> change from v2: >> based on Chris' drm/ttm: rework BO delayed delete patchset. >> >> --- >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + >> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 37 +++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++ >> 3 files changed, 42 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> index 9e8db702d878..0ee8aae6c519 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> @@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, >> struct mm_struct *mm); >> bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); >> struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); >> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); >> struct amdkfd_process_info { >> /* List head of all VMs that belong to a KFD process */ >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> index ef721cb65868..6aa20aa82bd3 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> @@ -276,6 +276,41 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, >> return 0; >> } >> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) >> +{ >> + struct amdgpu_bo *root = bo; >> + struct amdgpu_vm_bo_base *vm_bo; >> + struct amdgpu_vm *vm; >> + struct amdkfd_process_info *info; >> + struct amdgpu_amdkfd_fence *ef; >> + int ret; >> + >> + while (root->parent) >> + root = root->parent; > > This should not be necessary. Every page table BO has a pointer to a vm_bo that has a pointer to the vm. So you don't need to find the root. > > This should do the trick: > > if (!bo->vm_bo || !bo->vm_bo->vm) > return 0; > vm = bo->vm_bo->vm; > > well,when free page tables, it clears bo->vm_bo first then release pt/pd bo. Also we can change the sequence like I do in V2, looks like hit some weird issues. >> + >> + vm_bo = root->vm_bo; >> + if (!vm_bo) >> + return 0; >> + >> + vm = vm_bo->vm; >> + if (!vm) >> + return 0; >> + >> + info = vm->process_info; >> + if (!info || !info->eviction_fence) >> + return 0; >> + >> + ef = container_of(dma_fence_get(&info->eviction_fence->base), >> + struct amdgpu_amdkfd_fence, base); >> + >> + dma_resv_lock(bo->tbo.base.resv, NULL); >> + ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); >> + dma_resv_unlock(bo->tbo.base.resv); >> + >> + dma_fence_put(&ef->base); >> + return ret; >> +} >> + >> static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, >> bool wait) >> { >> @@ -1045,6 +1080,8 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, >> list_del(&vm->vm_list_node); >> mutex_unlock(&process_info->lock); >> + vm->process_info = NULL; >> + >> /* Release per-process resources when last compute VM is destroyed */ >> if (!process_info->n_vms) { >> WARN_ON(!list_empty(&process_info->kfd_bo_list)); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> index 6f60a581e3ba..16586651020f 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> @@ -1307,6 +1307,10 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) >> if (abo->kfd_bo) >> amdgpu_amdkfd_unreserve_memory_limit(abo); >> + /* We only remove the fence if the resv has individualized. */ >> + if (bo->base.resv == &bo->base._resv) > > Should this be a WARN_ON? We expect this condition to be always true. If it's not, there should be a noisy warning that something is wrong. good point. thanks xinhui > > Regards, > Felix > > >> + amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); >> + >> if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || >> !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) >> return; _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx