Am 2021-05-12 um 2:40 p.m. schrieb philip yang: > > > On 2021-05-12 11:54 a.m., Felix Kuehling wrote: >> Am 2021-05-12 um 8:38 a.m. schrieb Christian König: >>> Am 12.05.21 um 14:34 schrieb Philip Yang: >>>> Mapping huge page, 2MB aligned address with 2MB size, uses PDE0 as PTE. >>>> If previously valid PDE0, PDE0.V=1 and PDE0.P=0 turns into PTE, this >>>> requires TLB flush, otherwise page table walker will not read updated >>>> PDE0. >>>> >>>> Change page table update mapping to return free_table flag to indicate >>>> the previously valid PDE may have turned into a PTE if page table is >>>> freed. >>>> >>>> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++++++++++------ >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- >>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 ++++++++++-- >>>> 3 files changed, 22 insertions(+), 9 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> index 3dcdcc9ff522..27418f9407f1 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> @@ -1457,7 +1457,7 @@ static void amdgpu_vm_fragment(struct >>>> amdgpu_vm_update_params *params, >>>> */ >>>> static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params >>>> *params, >>>> uint64_t start, uint64_t end, >>>> - uint64_t dst, uint64_t flags) >>>> + uint64_t dst, uint64_t flags, bool *free_table) >>> Please put that flag into the params structure instead. >>> >>> Christian. >> I agree. Christian, I think we also need to keep track of this for >> graphics command submissions. amdgpu_cs_vm_handling needs get this flag >> from amdgpu_vm_bo_update, and amdgpu_cs_submit needs to update >> job->vm_needs_flushed based on this. > > amdgpu_vm_bo_update to pass NULL to ignore the free_table return flag. > > This new flag only used by SVM APIs. Old KFD and graphics command > submission path is not changed. > Understood. I'm pointing out that the same problem we're fixing for SVM may also affect graphics. So a follow-up patch may want to implement this conditional TLB flush for graphics command submissions as well. That would mean adding a "free_table" output parameter to amdgpu_vm_bo_update as well, and handling it in the amdgpu_cs code. Regards, Felix > Thanks, > > Philip > >> Regards, >> Felix >> >> >>>> { >>>> struct amdgpu_device *adev = params->adev; >>>> struct amdgpu_vm_pt_cursor cursor; >>>> @@ -1583,6 +1583,8 @@ static int amdgpu_vm_update_ptes(struct >>>> amdgpu_vm_update_params *params, >>>> while (cursor.pfn < frag_start) { >>>> amdgpu_vm_free_pts(adev, params->vm, &cursor); >>>> amdgpu_vm_pt_next(adev, &cursor); >>>> + if (free_table) >>>> + *free_table = true; >>>> } >>>> } else if (frag >= shift) { >>>> @@ -1610,6 +1612,7 @@ static int amdgpu_vm_update_ptes(struct >>>> amdgpu_vm_update_params *params, >>>> * @nodes: array of drm_mm_nodes with the MC addresses >>>> * @pages_addr: DMA addresses to use for mapping >>>> * @fence: optional resulting fence >>>> + * @free_table: return true if page table is freed >>>> * >>>> * Fill in the page table entries between @start and @last. >>>> * >>>> @@ -1624,7 +1627,8 @@ int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> uint64_t flags, uint64_t offset, >>>> struct drm_mm_node *nodes, >>>> dma_addr_t *pages_addr, >>>> - struct dma_fence **fence) >>>> + struct dma_fence **fence, >>>> + bool *free_table) >>>> { >>>> struct amdgpu_vm_update_params params; >>>> enum amdgpu_sync_mode sync_mode; >>>> @@ -1721,7 +1725,8 @@ int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> } >>>> tmp = start + num_entries; >>>> - r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags); >>>> + r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags, >>>> + free_table); >>>> if (r) >>>> goto error_unlock; >>>> @@ -1879,7 +1884,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>>> *adev, struct amdgpu_bo_va *bo_va, >>>> resv, mapping->start, >>>> mapping->last, update_flags, >>>> mapping->offset, nodes, >>>> - pages_addr, last_update); >>>> + pages_addr, last_update, NULL); >>>> if (r) >>>> return r; >>>> } >>>> @@ -2090,7 +2095,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device >>>> *adev, >>>> r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, >>>> resv, mapping->start, >>>> mapping->last, init_pte_value, >>>> - 0, NULL, NULL, &f); >>>> + 0, NULL, NULL, &f, NULL); >>>> amdgpu_vm_free_mapping(adev, vm, mapping, f); >>>> if (r) { >>>> dma_fence_put(f); >>>> @@ -3428,7 +3433,7 @@ bool amdgpu_vm_handle_fault(struct >>>> amdgpu_device *adev, u32 pasid, >>>> } >>>> r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, >>>> NULL, addr, >>>> - addr, flags, value, NULL, NULL, >>>> + addr, flags, value, NULL, NULL, NULL, >>>> NULL); >>>> if (r) >>>> goto error_unlock; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> index ea60ec122b51..f61c20b70b79 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> @@ -404,7 +404,7 @@ int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> uint64_t flags, uint64_t offset, >>>> struct drm_mm_node *nodes, >>>> dma_addr_t *pages_addr, >>>> - struct dma_fence **fence); >>>> + struct dma_fence **fence, bool *free_table); >>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >>>> struct amdgpu_bo_va *bo_va, >>>> bool clear); >>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> index b665e9ff77e3..4f28052d44bf 100644 >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> @@ -1084,7 +1084,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device >>>> *adev, struct amdgpu_vm *vm, >>>> return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>>> true, NULL, >>>> start, last, init_pte_value, 0, >>>> - NULL, NULL, fence); >>>> + NULL, NULL, fence, NULL); >>>> } >>>> static int >>>> @@ -1137,12 +1137,15 @@ svm_range_map_to_gpu(struct amdgpu_device >>>> *adev, struct amdgpu_vm *vm, >>>> struct amdgpu_device *bo_adev, struct dma_fence **fence) >>>> { >>>> struct amdgpu_bo_va bo_va; >>>> + bool free_table = false; >>>> + struct kfd_process *p; >>>> uint64_t pte_flags; >>>> int r = 0; >>>> pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, >>>> prange->start, >>>> prange->last); >>>> + p = container_of(prange->svms, struct kfd_process, svms); >>>> if (prange->svm_bo && prange->ttm_res) { >>>> bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); >>>> prange->mapping.bo_va = &bo_va; >>>> @@ -1159,7 +1162,8 @@ svm_range_map_to_gpu(struct amdgpu_device >>>> *adev, struct amdgpu_vm *vm, >>>> prange->mapping.offset, >>>> prange->ttm_res ? >>>> prange->ttm_res->mm_node : NULL, >>>> - dma_addr, &vm->last_update); >>>> + dma_addr, &vm->last_update, >>>> + &free_table); >>>> if (r) { >>>> pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); >>>> goto out; >>>> @@ -1175,6 +1179,10 @@ svm_range_map_to_gpu(struct amdgpu_device >>>> *adev, struct amdgpu_vm *vm, >>>> if (fence) >>>> *fence = dma_fence_get(vm->last_update); >>>> + if (free_table) >>>> + amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, >>>> + p->pasid); >>>> + >>>> out: >>>> prange->mapping.bo_va = NULL; >>>> return r; >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx@xxxxxxxxxxxxxxxxxxxxx >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx