Am 2021-05-29 um 6:51 p.m. schrieb Eric Huang: > 1. conditionally flush TLBs after map. > 2. add heavy weight TLBs flushing after unmap. > > Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 24 +++++++++++-------- > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++--- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++-- > 8 files changed, 25 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 2560977760b3..997258c24ef2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -85,6 +85,7 @@ struct kgd_mem { > > bool aql_queue; > bool is_imported; > + bool table_freed; > }; > > /* KFD Memory Eviction */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 385c33675227..8ac0d849fd3f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1132,6 +1132,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem, > return ret; > } > > + mem->table_freed = bo_va->table_freed; > + I think this should be mem->table_freed = mem->table_freed || bo_va->table_freed; That way, on a multi-GPU system, mem->table_freed gets set to true if any GPU freed a page table. Then somewhere, this needs to be reset to false. However, that means, if one GPU frees a page table, all GPUs need to flush, which may be unnecessary. A better alternative would be to do the TLB flushing right here, only for the affected GPU, instead of returning an aggregated "table_freed" all the way back to kfd_ioctl_map_memory_to_gpu, which flushes all GPUs. Finally, bo_va->table_freed is only used once, after the amdgpu_vm_bo_update call returns. So there is no good reason to store this permanently in the bo_va structure. It would be better to just add an output parameter to amdgpu_vm_bo_update. Regards, Felix > return amdgpu_sync_fence(sync, bo_va->last_pt_update); > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index 36e7f088d4ee..0e0f27f779cd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -87,6 +87,7 @@ struct amdgpu_bo_va { > bool cleared; > > bool is_xgmi; > + bool table_freed; > }; > > struct amdgpu_bo { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 95b94c95adac..ff3eb8395017 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1885,7 +1885,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, > resv, mapping->start, > mapping->last, update_flags, > mapping->offset, mem, > - pages_addr, last_update, NULL, > + pages_addr, last_update, &bo_va->table_freed, > vram_base_offset); > if (r) > return r; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 960913a35ee4..c45ccd1d03c0 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1658,16 +1658,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, > } > > /* Flush TLBs after waiting for the page table updates to complete */ > - for (i = 0; i < args->n_devices; i++) { > - peer = kfd_device_by_id(devices_arr[i]); > - if (WARN_ON_ONCE(!peer)) > - continue; > - peer_pdd = kfd_get_process_device_data(peer, p); > - if (WARN_ON_ONCE(!peer_pdd)) > - continue; > - if (!amdgpu_read_lock(peer->ddev, true)) { > - kfd_flush_tlb(peer_pdd); > - amdgpu_read_unlock(peer->ddev); > + if (((struct kgd_mem *)mem)->table_freed) { > + for (i = 0; i < args->n_devices; i++) { > + peer = kfd_device_by_id(devices_arr[i]); > + if (WARN_ON_ONCE(!peer)) > + continue; > + peer_pdd = kfd_get_process_device_data(peer, p); > + if (WARN_ON_ONCE(!peer_pdd)) > + continue; > + if (!amdgpu_read_lock(peer->ddev, true)) { > + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); > + amdgpu_read_unlock(peer->ddev); > + } > } > } > > @@ -1766,6 +1768,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, > amdgpu_read_unlock(peer->ddev); > goto unmap_memory_from_gpu_failed; > } > + ((struct kgd_mem *)mem)->table_freed = false; > + kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); > amdgpu_read_unlock(peer->ddev); > args->n_success = i+1; > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index c1bea1f7627b..a4920bc5cfbc 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, > qpd->vmid, > qpd->page_table_base); > /* invalidate the VM context after pasid and vmid mapping is set up */ > - kfd_flush_tlb(qpd_to_pdd(qpd)); > + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); > > if (dqm->dev->kfd2kgd->set_scratch_backing_va) > dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, > @@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, > if (flush_texture_cache_nocpsch(q->device, qpd)) > pr_err("Failed to flush TC\n"); > > - kfd_flush_tlb(qpd_to_pdd(qpd)); > + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); > > /* Release the vmid mapping */ > set_pasid_vmid_mapping(dqm, 0, qpd->vmid); > @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, > dqm->dev->kgd, > qpd->vmid, > qpd->page_table_base); > - kfd_flush_tlb(pdd); > + kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); > } > > /* Take a safe reference to the mm_struct, which may otherwise > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index ecdd5e782b81..edce3ecf207d 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev); > > void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); > > -void kfd_flush_tlb(struct kfd_process_device *pdd); > +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); > > int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 4ab9da288f90..a03373743a3d 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -2161,7 +2161,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, > KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); > } > > -void kfd_flush_tlb(struct kfd_process_device *pdd) > +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) > { > struct kfd_dev *dev = pdd->dev; > > @@ -2174,7 +2174,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd) > pdd->qpd.vmid); > } else { > amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, > - pdd->process->pasid, TLB_FLUSH_LEGACY); > + pdd->process->pasid, type); > } > } > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx