Properly mark kfd_process->ef as __rcu and consistently access it with rcu_dereference_protected. Reported-by: kernel test robot <lkp@xxxxxxxxx> Closes: https://lore.kernel.org/oe-kbuild-all/202312052245.yFpBSgNH-lkp@xxxxxxxxx/ Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 ++++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f2e920734c98..20cb266dcedd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -314,7 +314,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, - struct dma_fence **ef); + struct dma_fence __rcu **ef); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d91f99acb59..8ba6f6c8363d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2806,7 +2806,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) put_task_struct(usertask); } -static void replace_eviction_fence(struct dma_fence **ef, +static void replace_eviction_fence(struct dma_fence __rcu **ef, struct dma_fence *new_ef) { struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true @@ -2841,7 +2841,7 @@ static void replace_eviction_fence(struct dma_fence **ef, * 7. Add fence to all PD and PT BOs. * 8. Unreserve all BOs */ -int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef) { struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 45366b4ca976..5a24097a9f28 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -917,7 +917,7 @@ struct kfd_process { * fence will be triggered during eviction and new one will be created * during restore */ - struct dma_fence *ef; + struct dma_fence __rcu *ef; /* Work items for evicting and restoring BOs */ struct delayed_work eviction_work; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 71df51fcc1b0..14b11d61f8dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1110,6 +1110,8 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); + struct dma_fence *ef = rcu_dereference_protected(p->ef, + kref_read(&p->ref) == 0); kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); @@ -1118,7 +1120,7 @@ static void kfd_process_wq_release(struct work_struct *work) * destroyed. This allows any BOs to be freed without * triggering pointless evictions or waiting for fences. */ - dma_fence_signal(p->ef); + dma_fence_signal(ef); kfd_process_remove_sysfs(p); @@ -1127,7 +1129,7 @@ static void kfd_process_wq_release(struct work_struct *work) svm_range_list_fini(p); kfd_process_destroy_pdds(p); - dma_fence_put(p->ef); + dma_fence_put(ef); kfd_event_free_process(p); -- 2.34.1