Am 2021-10-15 um 2:54 a.m. schrieb Lang Yu: > Currently, all kfd BOs use same destruction routine. But pinned > BOs are not unpinned properly. Separate them from general routine. > > v2 (Felix): > Add safeguard to prevent user space from freeing signal BO. > Kunmap signal BO in the event of setting event page error. > Just kunmap signal BO to avoid duplicating the code. > > Signed-off-by: Lang Yu <lang.yu@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 ++ > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 31 +++-- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 110 +++++++++++++----- > 5 files changed, 119 insertions(+), 37 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 69de31754907..751557af09bb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -279,6 +279,8 @@ int amdgpu_amdkfd_gpuvm_sync_memory( > struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); > int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, > struct kgd_mem *mem, void **kptr, uint64_t *size); > +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem); > + > int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, > struct dma_fence **ef); > int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index cdf46bd0d8d5..4969763c2e47 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1871,6 +1871,16 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, > return ret; > } > > +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem) > +{ > + struct amdgpu_bo *bo = mem->bo; > + > + amdgpu_bo_reserve(bo, true); > + amdgpu_bo_kunmap(bo); > + amdgpu_bo_unpin(bo); > + amdgpu_bo_unreserve(bo); > +} > + > int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, > struct kfd_vm_fault_info *mem) > { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index f1e7edeb4e6b..9317a2e238d0 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1011,11 +1011,6 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, > void *mem, *kern_addr; > uint64_t size; > > - if (p->signal_page) { > - pr_err("Event page is already set\n"); > - return -EINVAL; > - } > - > kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); > if (!kfd) { > pr_err("Getting device by id failed in %s\n", __func__); > @@ -1023,6 +1018,13 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, > } > > mutex_lock(&p->mutex); > + > + if (p->signal_page) { > + pr_err("Event page is already set\n"); > + err = -EINVAL; > + goto out_unlock; > + } > + > pdd = kfd_bind_process_to_device(kfd, p); > if (IS_ERR(pdd)) { > err = PTR_ERR(pdd); > @@ -1037,20 +1039,24 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, > err = -EINVAL; > goto out_unlock; > } > - mutex_unlock(&p->mutex); > > err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, > mem, &kern_addr, &size); > if (err) { > pr_err("Failed to map event page to kernel\n"); > - return err; > + goto out_unlock; > } > > err = kfd_event_page_set(p, kern_addr, size); > if (err) { > pr_err("Failed to set event page\n"); > - return err; > + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem); > + goto out_unlock; > } > + > + p->signal_handle = args->event_page_offset; > + > + mutex_unlock(&p->mutex); > } > > err = kfd_event_create(filp, p, args->event_type, > @@ -1368,6 +1374,15 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, > return -EINVAL; > > mutex_lock(&p->mutex); > + /* > + * Safeguard to prevent user space from freeing signal BO. > + * It will be freed at process termination. > + */ > + if (p->signal_handle && (p->signal_handle == args->handle)) { > + pr_err("Free signal BO is not allowed\n"); > + ret = -EPERM; > + goto err_unlock; > + } > > pdd = kfd_get_process_device_data(dev, p); > if (!pdd) { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 6d8f9bb2d905..30f08f1606bb 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -608,12 +608,14 @@ struct qcm_process_device { > uint32_t sh_hidden_private_base; > > /* CWSR memory */ > + struct kgd_mem *cwsr_mem; > void *cwsr_kaddr; > uint64_t cwsr_base; > uint64_t tba_addr; > uint64_t tma_addr; > > /* IB memory */ > + struct kgd_mem *ib_mem; > uint64_t ib_base; > void *ib_kaddr; > > @@ -808,6 +810,7 @@ struct kfd_process { > /* Event ID allocator and lookup */ > struct idr event_idr; > /* Event page */ > + u64 signal_handle; > struct kfd_signal_page *signal_page; > size_t signal_mapped_size; > size_t signal_event_count; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 21ec8a18cad2..26fc716a92c2 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); > static void evict_process_worker(struct work_struct *work); > static void restore_process_worker(struct work_struct *work); > > +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd); > + > struct kfd_procfs_tree { > struct kobject *kobj; > }; > @@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void) > } > > static void kfd_process_free_gpuvm(struct kgd_mem *mem, > - struct kfd_process_device *pdd) > + struct kfd_process_device *pdd, void *kptr) > { > struct kfd_dev *dev = pdd->dev; > > + if (kptr) { > + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem); > + kptr = NULL; > + } > + > amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv); > amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv, > NULL); > @@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, > */ > static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, > uint64_t gpu_va, uint32_t size, > - uint32_t flags, void **kptr) > + uint32_t flags, struct kgd_mem **mem, void **kptr) > { > struct kfd_dev *kdev = pdd->dev; > - struct kgd_mem *mem = NULL; > - int handle; > int err; > > err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, > - pdd->drm_priv, &mem, NULL, flags); > + pdd->drm_priv, mem, NULL, flags); > if (err) > goto err_alloc_mem; > > - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, > + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem, > pdd->drm_priv, NULL); > if (err) > goto err_map_mem; > > - err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true); > + err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true); > if (err) { > pr_debug("Sync memory failed, wait interrupted by user signal\n"); > goto sync_memory_failed; > } > > - /* Create an obj handle so kfd_process_device_remove_obj_handle > - * will take care of the bo removal when the process finishes. > - * We do not need to take p->mutex, because the process is just > - * created and the ioctls have not had the chance to run. > - */ > - handle = kfd_process_device_create_obj_handle(pdd, mem); > - > - if (handle < 0) { > - err = handle; > - goto free_gpuvm; > - } > - > if (kptr) { > err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, > - (struct kgd_mem *)mem, kptr, NULL); > + (struct kgd_mem *)*mem, kptr, NULL); > if (err) { > pr_debug("Map GTT BO to kernel failed\n"); > - goto free_obj_handle; > + goto sync_memory_failed; > } > } > > return err; > > -free_obj_handle: > - kfd_process_device_remove_obj_handle(pdd, handle); > -free_gpuvm: > sync_memory_failed: > - kfd_process_free_gpuvm(mem, pdd); > - return err; > + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv); > > err_map_mem: > - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv, > + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv, > NULL); > err_alloc_mem: > + *mem = NULL; > *kptr = NULL; > return err; > } > @@ -776,6 +766,7 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) > KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE | > KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | > KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; > + struct kgd_mem *mem; > void *kaddr; > int ret; > > @@ -784,15 +775,26 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) > > /* ib_base is only set for dGPU */ > ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, > - &kaddr); > + &mem, &kaddr); > if (ret) > return ret; > > + qpd->ib_mem = mem; > qpd->ib_kaddr = kaddr; > > return 0; > } > > +static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd) > +{ > + struct qcm_process_device *qpd = &pdd->qpd; > + > + if (!qpd->ib_kaddr || !qpd->ib_base) > + return; > + > + kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr); > +} > + > struct kfd_process *kfd_create_process(struct file *filep) > { > struct kfd_process *process; > @@ -947,6 +949,38 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) > } > } > > +/* > + * Just kunmap and unpin signal BO here. It will be freed in > + * kfd_process_free_outstanding_kfd_bos() > + */ > +static void kfd_process_kunmap_signal_bo(struct kfd_process *p) > +{ > + struct kfd_process_device *pdd; > + struct kfd_dev *kdev; > + void *mem; > + int i; > + > + kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle)); > + if (!kdev) > + return; > + > + mutex_lock(&p->mutex); > + > + pdd = kfd_get_process_device_data(kdev, p); > + if (!pdd) > + goto out; > + > + mem = kfd_process_device_translate_handle( > + pdd, GET_IDR_HANDLE(p->signal_handle)); > + if (!mem) > + goto out; > + > + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem); > + > +out: > + mutex_unlock(&p->mutex); > +} > + > static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) > { > int i; > @@ -965,6 +999,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) > pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", > pdd->dev->id, p->pasid); > > + kfd_process_device_destroy_cwsr_dgpu(pdd); > + kfd_process_device_destroy_ib_mem(pdd); > + > if (pdd->drm_file) { > amdgpu_amdkfd_gpuvm_release_process_vm( > pdd->dev->kgd, pdd->drm_priv); > @@ -1049,9 +1086,11 @@ static void kfd_process_wq_release(struct work_struct *work) > { > struct kfd_process *p = container_of(work, struct kfd_process, > release_work); > + > kfd_process_remove_sysfs(p); > kfd_iommu_unbind_process(p); > > + kfd_process_kunmap_signal_bo(p); > kfd_process_free_outstanding_kfd_bos(p); > svm_range_list_fini(p); > > @@ -1198,6 +1237,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) > uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT > | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE > | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; > + struct kgd_mem *mem; > void *kaddr; > int ret; > > @@ -1206,10 +1246,11 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) > > /* cwsr_base is only set for dGPU */ > ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, > - KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr); > + KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr); > if (ret) > return ret; > > + qpd->cwsr_mem = mem; > qpd->cwsr_kaddr = kaddr; > qpd->tba_addr = qpd->cwsr_base; > > @@ -1222,6 +1263,17 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) > return 0; > } > > +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd) > +{ > + struct kfd_dev *dev = pdd->dev; > + struct qcm_process_device *qpd = &pdd->qpd; > + > + if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) > + return; > + > + kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr); > +} > + > void kfd_process_set_trap_handler(struct qcm_process_device *qpd, > uint64_t tba_addr, > uint64_t tma_addr)